from collections import defaultdict
from itertools import ifilter

import luigi
import yt.wrapper as yt

from lib import graphite_sender
from lib.luigi import yt_luigi
from matching.device_matching.app_metrica import app_metrica_month
from matching.human_matching import graph_vertices
from matching.yuid_matching import graph_merge_month
from rtcconf import config
from utils import mr_utils as mr
from v2 import ids


def mapper_key_mac_pairs(rec):
    id_type = rec['id_type']

    if id_type in ids.CRYPTA_DEVICE_IDS:
        key = rec['id']
        devid_macs = rec.get('macs')
        if devid_macs:
            for mac in devid_macs:
                yield {'key': key, 'mac': mac, 'id_type': 'deviceid'}

    else:
        key = rec['yuid']
        yield {'key': key, 'mac': rec['id_value'], 'id_type': 'yuid-not-matched'}


def reducer_key_mac_pairs(keys, recs):
    macs = set()
    for rec in recs:
        macs.add(rec.get('mac', ''))
    for mac in sorted(ifilter(bool, macs)):
        yield {'key': keys['key'], 'mac': mac, 'id_type': keys['id_type']}


def reducer_take_cryptaids(keys, recs):
    cryptaids = set()
    macs = set()
    for record in recs:
        if record.get('crypta_id', ''):
            cryptaids.add(record.get('crypta_id'))
        elif record.get('mac', ''):
            macs.add(record.get('mac', ''))
    for mac in sorted(ifilter(bool, macs)):
        for crypta_id in cryptaids:
            yield {'crypta_id':crypta_id, 'mac':mac}


def reducer_take_yuids(keys, recs):
    key_ids = defaultdict(list)
    macs = set()
    for record in recs:
        if record.get('id_type', ''):
            key_ids[record.get('id_type', '')].append(record.get('key'))
        elif record.get('mac', ''):
            macs.add(record.get('mac'))
    for id_type in ifilter(bool, key_ids.iterkeys()):
        for key in key_ids[id_type]:
            for mac in sorted(ifilter(bool, macs)):
                yield {'key': key, 'id_type': id_type, 'mac': mac}


def final_reducer_merge(keys, recs):
    id_types = set()
    for record in recs:
        id_types.add(record.get('id_type', ''))
    id_types.discard('yuid-not-matched')
    if not len(id_types):
        yield {'key': keys['key'], 'id_type': 'yuid-not-matched', 'mac': keys['mac']}
    else:
        for id_type in ifilter(bool, id_types):
            yield {'key': keys['key'], 'id_type': id_type, 'mac': keys['mac']}
            return


@yt.aggregator
def count_mac_with_types_mapper(recs):
    id_types = defaultdict(int)
    for record in recs:
        id_types[record.get('id_type', '')] += 1
    for id_type in filter(bool, id_types.keys()):
        yield {'id_type':id_type, 'count':id_types[id_type]}


def count_mac_with_types_reducer(keys, recs):
    finsum = 0
    for record in recs:
        finsum += record['count']
    yield {'id_type':keys.get('id_type', ''), 'count':finsum}


class MacAddressesWithIdsStatTask(yt_luigi.BaseYtTask):

    date = luigi.Parameter()
    exact_task = luigi.TaskParameter()
    fuzzy_task = luigi.TaskParameter()

    def input_folders(self):
        return {
            'dict': config.GRAPH_YT_DICTS_FOLDER,
            'ids_storage': config.CRYPTA_IDS_STORAGE,
            'mobile': config.YT_OUTPUT_FOLDER + self.date + '/mobile/',
            'long_live': config.YT_OUTPUT_FOLDER + self.date + '/yuid_raw/long_live/'
        }

    def requires(self):

        return [
            app_metrica_month.AppMetricaDictMergeMonthTask(self.date),
            graph_merge_month.IncrementalDayAndDumpMergeTask(self.date),
            graph_vertices.CopyVerticesToDict(self.date, exact_task=self.exact_task, fuzzy_task=self.fuzzy_task)
        ]

    def output(self):
        return yt_luigi.TodayFileTarget(config.LOCAL_OUTPUT_FOLDER + 'graph_macs_counts_stats', self.date)

    def send_stats(self, input_tables, send_key):
        matching_input = self.in_f('dict') + 'matching/exact_vertices_by_key'
        matching_int_input = self.in_f('dict') + 'matching/exact_vertices_by_crypta_id'

        key_with_macs = self.in_f('dict') + 'tmp_table_mac_addressess_keys_with_macs'
        cryptaids_with_macs = self.in_f('dict') + 'tmp_table_mac_addressess_cryptaids'
        tmp_all_ids_with_macs_table = self.in_f('dict') + 'tmp_table_mac_addressess_all_ids_not_ready'
        all_ids_with_macs_table = self.in_f('dict') + 'tmp_table_mac_addressess_all_ids'
        final_stats_table = self.in_f('dict') + 'tmp_table_mac_addressess_all_stats'

        yt.run_map_reduce(
            mapper_key_mac_pairs,
            reducer_key_mac_pairs,
            input_tables,
            key_with_macs,
            reduce_by=['key', 'id_type']
        )
        yt.run_map_reduce(
            None,
            reducer_take_cryptaids,
            [key_with_macs, matching_input],
            cryptaids_with_macs,
            reduce_by=['key']
        )
        yt.run_map_reduce(
            None,
            reducer_take_yuids,
            [cryptaids_with_macs, matching_int_input],
            tmp_all_ids_with_macs_table,
            reduce_by=['crypta_id']
        )
        yt.run_map_reduce(
            None,
            final_reducer_merge,
            [key_with_macs, tmp_all_ids_with_macs_table],
            all_ids_with_macs_table,
            reduce_by=['key', 'mac']
        )
        yt.run_map_reduce(
            count_mac_with_types_mapper,
            count_mac_with_types_reducer,
            all_ids_with_macs_table,
            final_stats_table,
            reduce_by='id_type'
        )

        stats = {}
        for row in yt.read_table(final_stats_table):
            stats[row.get('id_type', 'errors')] = row.get('count', 0)
        stats['devids'] = stats['deviceid']
        stats['yuids'] = sum((stats[k] for k in stats.iterkeys() if k.startswith('yuid')), 0)
        del stats['deviceid']
        graphite_sender.to_graphite_sender_batch(send_key, stats.iteritems(), self.date)

        mr.drop(key_with_macs)
        mr.drop(cryptaids_with_macs)
        mr.drop(tmp_all_ids_with_macs_table)
        mr.drop(all_ids_with_macs_table)
        mr.drop(final_stats_table)

    def run(self):
        # PER MONTH
        devmac_input = self.in_f('ids_storage') + 'device_id/app_metrica_month'
        vmetro_input = self.in_f('long_live') + 'active_yuids_mac_vmetro'
        send_key = 'graph_macs_counts.v_type'
        self.send_stats([devmac_input, vmetro_input], send_key)

        # PER DAY
        devmac_day_input = self.in_f('mobile') + 'dev_info_yt'
        send_key = 'graph_macs_counts_day.v_type'
        self.send_stats([devmac_day_input, vmetro_input], send_key)

        yt_luigi.TodayFileTarget.done(config.LOCAL_OUTPUT_FOLDER + 'graph_macs_counts_stats', self.date)
        return


if __name__ == '__main__':
    import os
    yt.config.set_proxy(os.getenv('RTCRYPTA_MR_SERVER'))
    MacAddressesWithIdsStatTask(date='2017-07-05').run()
