import luigi
import yt.wrapper as yt

from lib import graphite_sender
from lib.luigi import yt_luigi
from matching.human_matching.graph_vertices import GraphVerticesExact
from matching.yuid_matching.enrich.enrich_social_with_people_search import EnrichSocialIdsWithPeopleSearch
from matching.yuid_matching.enrich.puid_yuid_passport import ExpandPuidYuidMatching
from matching.yuid_matching.graph_merge_month import IncrementalDayAndDumpMergeTask, FullMonthYuidMergeTask
from rtcconf import config


def mapper_find_unique_sources(rec):
    yuid1_sources = rec.get('yuid1_sources', [])
    yuid2_sources = rec.get('yuid2_sources', [])
    pair_source = rec.get('pair_source', '')

    first_source = None
    pair_type = rec.get('pair_type', '')
    if len(yuid1_sources) == 1:
        first_source = yuid1_sources[0]
        if pair_source == 'ok':
            yield {'source': str('ok_' + yuid1_sources[0]), 'pair_type': pair_type}
        elif pair_source == 'avito_watch_log':
            yield {'source': str('avito_' + yuid1_sources[0]), 'pair_type': pair_type}
        else:
            yield {'source': str(yuid1_sources[0]), 'pair_type': pair_type}
    if len(yuid2_sources) == 1 and first_source != yuid2_sources[0]:
        if pair_source == 'ok':
            yield {'source': str('ok_' + yuid2_sources[0]), 'pair_type': pair_type}
        elif pair_source == 'avito_watch_log':
            yield {'source': str('avito_' + yuid2_sources[0]), 'pair_type': pair_type}
        else:
            yield {'source': str(yuid2_sources[0]), 'pair_type': pair_type}


def reducer_by_key(key, recs):
    yuid1_sources_all = []
    yuid2_sources_all = []
    pair_type = ''
    pair_source = ''
    for rec in recs:
        pair_type = rec.get('pair_type', '')
        yuid1_sources_all += rec.get('yuid1_sources', [])
        yuid2_sources_all += rec.get('yuid2_sources', [])
        pair_source = rec.get('pair_source', '')

        if pair_type == 'd_y':
            splitted_pair_source = pair_source.split('_')
            source = splitted_pair_source[0]
            if len(splitted_pair_source) > 1 and splitted_pair_source[1] == 'log':  # for watch_log, access_log, etc.
                source += '_' + splitted_pair_source[1]
            yuid1_sources_all.append(source)

    yield {'key': key['key'], 'pair_type': pair_type, 'pair_source': pair_source,
           'yuid1_sources': yuid1_sources_all,
           'yuid2_sources': yuid2_sources_all}

def reducer_by_key_uniq(key, recs):
    all_sources = []
    pair_types = []
    for rec in recs:
        pair_type = rec.get('pair_type', '')
        yuid_sources_all = rec.get('yuid_sources', [])
        all_sources += yuid_sources_all
        pair_types += [pair_type]
    if all(pair_type == 'd_y' for pair_type in pair_types) and len(set(all_sources)) == 1:
        yield {'count': 1, 'source': all_sources[0]}


def reducer_count_sources(key, recs):
    sum_ = 0
    for rec in recs:
        sum_ += rec.get('count', 1)
    yield {'source': key['source'], 'pair_type': key['pair_type'], 'count': sum_}

def reducer_combiner_counts_uniq(key, recs):
    sum_ = 0
    for rec in recs:
        sum_ += rec.get('count', 1)
    yield {'source': key['source'], 'count': sum_}


class SourceProfitTask(yt_luigi.BaseYtTask):

    date = luigi.Parameter()

    def requires(self):
        return GraphVerticesExact(self.date, vertices_type='exact', yuid_pairs_folder='pairs/')

    def output(self):
        return yt_luigi.TodayFileTarget(config.LOCAL_OUTPUT_FOLDER + 'graph_vertices_stat', self.date)

    def input_folders(self):
        return {
            'yuid_pairs': config.YT_OUTPUT_FOLDER + self.date + '/exact/'
        }

    def output_folders(self):
        return {
            'yuid_pairs': config.YT_OUTPUT_FOLDER + self.date + '/exact/'
        }

    def upload_to_graphite(self, source_count_table):
        result = {}
        for rec in yt.read_table(source_count_table, raw=False):
            source = rec['source']
            pair_type = rec['pair_type']
            count = rec['count']
            result[source + "." + pair_type] = count

        graphite_sender.to_graphite_sender_batch('graph_pairs.sources', result.iteritems(), self.date)

    def upload_to_graphite_uniq_y(self, source_count_table):
        result = {}
        for rec in yt.read_table(source_count_table, raw=False):
            source = rec['source']
            count = rec['count']
            result[source + '.d_y'] = count

        graphite_sender.to_graphite_sender_batch('unique_pair_counts.sources', result.iteritems(), self.date)

    def run(self):
        yt.config.set_proxy(config.MR_SERVER)
        yt.config["memory_limit"] = 1000000000

        yuid_pairs_table = self.out_f('yuid_pairs') + 'yuid_pairs'
        reduced_yuid_pairs_table = yuid_pairs_table + '_reduced'
        yt.run_sort(yuid_pairs_table, sort_by='key')
        yt.run_reduce(reducer_by_key, yuid_pairs_table, reduced_yuid_pairs_table,
                      sort_by='key', reduce_by='key')

        sources_table = yuid_pairs_table + '_stats'
        yt.run_map_reduce(mapper_find_unique_sources, reducer_count_sources,
                          reduced_yuid_pairs_table, sources_table,
                          reduce_by=['source', 'pair_type'], reduce_combiner=reducer_count_sources)

        self.upload_to_graphite(sources_table)

        reduced_yuid_dev_id_uniq_pairs_table = yuid_pairs_table + '_uniq_y_reduced'
        resulted_yuid_dev_id_uniq_pairs_table = yuid_pairs_table + '_uniq_y_resulted'
        yt.run_reduce(
            reducer_by_key_uniq,
            yuid_pairs_table,
            reduced_yuid_dev_id_uniq_pairs_table,
            sort_by='key',
            reduce_by='key'
        )
        yt.run_map_reduce(
            None,
            reducer_combiner_counts_uniq,
            reduced_yuid_dev_id_uniq_pairs_table,
            resulted_yuid_dev_id_uniq_pairs_table,
            sort_by=['source'],
            reduce_by=['source'],
            reduce_combiner=reducer_combiner_counts_uniq
        )

        self.upload_to_graphite_uniq_y(resulted_yuid_dev_id_uniq_pairs_table)

        yt_luigi.TodayFileTarget.done(config.LOCAL_OUTPUT_FOLDER + 'graph_vertices_stat', self.date)


class YuidIdBySourceStatsTask(yt_luigi.BaseYtTask):

    date = luigi.Parameter()

    def output_folders(self):
        return {
            'dict': config.GRAPH_YT_DICTS_FOLDER
        }

    def input_folders(self):
        return {
            'dict': config.GRAPH_YT_DICTS_FOLDER
        }

    def requires(self):
        return [IncrementalDayAndDumpMergeTask(self.date),  # incremental merge
                FullMonthYuidMergeTask(self.date),  # or from full month merge;
                ExpandPuidYuidMatching(self.date),  # some of them enriched from puid to yuid,
                EnrichSocialIdsWithPeopleSearch(self.date),
                SourceProfitTask(self.date)]

    def output(self):
        return yt_luigi.TodayFileTarget(config.LOCAL_OUTPUT_FOLDER + 'graph_pairs_source_stat', self.date)

    def run(self):
        files = yt.list(self.out_f('dict')[:-1])
        stats = {}
        for f in files:
            if f.startswith('yuid_with_id_'):
                suffix = f.replace('yuid_with_id_', '')
                splitted = suffix.split('_', 1)
                id_type = splitted[0]
                if len(splitted) == 1:  # for example, 'yuid_with_id_phone' -> 'unknown' source
                    source = 'unknown'
                else:  # for example, 'yuid_with_id_phone_passport_dump' -> 'passport_dump' source
                    source = splitted[1]
                count = yt.row_count(self.out_f('dict') + f)

                stats[source + '.' + id_type] = count

        graphite_sender.to_graphite_sender_batch('graph_pairs.absolute.sources', stats.iteritems(), self.date)

        yt_luigi.TodayFileTarget.done(config.LOCAL_OUTPUT_FOLDER + 'graph_pairs_source_stat', self.date)


if __name__ == '__main__':
    SourceProfitTask(date='2016-10-20').run()
