import itertools
import logging
from collections import defaultdict
from functools import partial

import luigi
import yt.wrapper as yt

import graph_quality_metrics
from data_imports.import_logs.watch_log import graph_ecommerce
from lib import graphite_sender
from lib.luigi import base_luigi_task
from lib.luigi import yt_luigi
from matching.common_stats import graph_stat
from radius import radius_metrics
from rtcconf import config
from utils import mr_utils as mr
from utils import utils

STAT_CID_SIZE_OOM = 10000


def reduce_compare_vertices(id_key, recs):
    vertices_new, vertices_old = mr.split_left_right(recs, oom_check=False)

    if vertices_new and vertices_old:
        v_new = vertices_new[0]
        v_old = vertices_old[0]
        if v_new['crypta_id'] == v_old['crypta_id']:
            v_new['@table_index'] = 0
            v_new['stability'] = 'stable'
            yield v_new
        else:
            v_new['@table_index'] = 0
            v_new['stability'] = 'changed'
            v_new['prev_crypta_id'] = v_new['crypta_id']
            yield v_new
    elif vertices_old:
        v_old = vertices_old[0]
        v_old['@table_index'] = 0
        v_old['stability'] = 'del'
        yield v_old
    elif vertices_new:
        v_new = vertices_new[0]
        v_new['@table_index'] = 0
        v_new['stability'] = 'new'
        yield v_new


def map_overlimit_vertices(rec, limit):
    if rec['crypta_id_size'] > limit:
        yield rec


def crypta_id_stats(crypta_id_key, recs):
    stats_rec = graph_quality_metrics.crypta_id_stats(recs)
    quality_metrics_rec = graph_quality_metrics.crypta_id_quality_metrics(stats_rec)

    out_rec = {'crypta_id': crypta_id_key['crypta_id']}
    out_rec.update(stats_rec)
    out_rec.update(quality_metrics_rec)

    yield out_rec


class VerticesStabilityStats(base_luigi_task.BaseTask):
    vertices_config_old = luigi.Parameter()
    vertices_config_new = luigi.Parameter()
    metric_name = luigi.Parameter()

    def requires(self):
        return [self.vertices_config_new.producing_task]

    def run(self):
        vertices_new = self.vertices_config_new.get_vertices_table()
        vertices_old = self.vertices_config_old.get_vertices_table()

        st_out_t = vertices_new + '_' + self.metric_name

        if yt.exists(vertices_old):
            vertices_tables = [vertices_new, vertices_old]
        else:
            vertices_tables = [vertices_new]

        logging.info('Comparing %s and %s' % (vertices_new, vertices_old))

        # mr.sort_all(vertices_tables, sort_by='key') # assume sorted
        yt.run_reduce(reduce_compare_vertices,
                      vertices_tables,
                      st_out_t,
                      reduce_by='key')

        mr.count_field_recs(st_out_t, st_out_t + '_stat', columns='stability')

        # send to graphite
        metric_prefix = 'vertices_%s_%s' % (self.metric_name , self.vertices_config_new.vertices_type)
        metrics = []
        for r in yt.read_table(st_out_t + '_stat', raw=False):
            metrics.append((metric_prefix, r['stability'], r['count']))

        graphite_sender.to_graphite_sender(metrics, self.vertices_config_new.date)

    def output(self):
        return yt_luigi.YtTarget(self.vertices_config_new.get_vertices_table() + '_stability_stat')


class VerticesCountAndSizeStats(base_luigi_task.BaseTask):
    vertices_config = luigi.Parameter()
    stats_task = luigi.Parameter()

    def requires(self):
        return [self.vertices_config.producing_task, self.stats_task]

    def run(self):
        vertices_type = self.vertices_config.vertices_type
        vertices_pretty = self.vertices_config.get_vertices_table()
        vertices_stat_f = self.vertices_config.get_vertices_folder() + 'stat/'
        mr.mkdir(vertices_stat_f)

        vertices_size_hist_metrics = self.crypta_ids_size_hist(vertices_pretty, vertices_stat_f, vertices_type)
        vertices_count_metrics = self.vertices_counts(vertices_pretty, vertices_stat_f, vertices_type)

        graphite_sender.to_graphite_sender(vertices_size_hist_metrics + vertices_count_metrics,
                                           self.vertices_config.date)


    def crypta_ids_size_hist(self, vertices_pretty, vertices_stat_f, vertices_type):
        # count crypta ids of each size
        hist_out_t = vertices_stat_f + 'crypta_ids_size_hist'
        mr.count_field_recs(vertices_stat_f + 'crypta_ids_stats', hist_out_t, columns='crypta_id_size')
        yt.run_sort(hist_out_t, sort_by='crypta_id_size')

        metric_name = 'vertices_hist_' + vertices_type
        metrics = []
        recs = yt.read_table(hist_out_t, raw=False)
        first_15_sizes = itertools.islice(recs, config.FINAL_MERGE_LIMIT)
        for r in first_15_sizes:
            metrics.append((metric_name, r['crypta_id_size'], r['count']))

        the_rest = sum(r['count'] for r in recs)
        metrics.append((metric_name, 'overlimit', the_rest))

        crypta_ids_total_count = yt.row_count(vertices_stat_f + 'crypta_ids_stats')
        metrics.append((metric_name, 'total', crypta_ids_total_count))

        return metrics


    def vertices_counts(self, vertices_pretty, vertices_stat_f, vertices_type):
        count_out_t = vertices_stat_f + 'vertices_count_stat'
        mr.count_field_recs(vertices_pretty, count_out_t, columns='id_type')

        metric_name = 'vertices_count_' + vertices_type
        metrics = []
        for r in yt.read_table(count_out_t, raw=False):
            metrics.append((metric_name, r['id_type'], r['count']))

        vertices_overlimit_table = vertices_stat_f + 'vertices_count_overlimit'
        yt.run_map(partial(map_overlimit_vertices, limit=config.FINAL_MERGE_LIMIT),
                   vertices_pretty, vertices_overlimit_table)

        total = yt.row_count(vertices_pretty)
        overlimit = yt.row_count(vertices_overlimit_table)

        metrics.append((metric_name, 'total', total))
        metrics.append((metric_name, 'overlimit', overlimit))
        metrics.append((metric_name, 'to_bb', total - overlimit))

        mr.drop(vertices_overlimit_table)

        return metrics


    def output(self):
        return [yt_luigi.YtTarget(self.vertices_config.get_vertices_folder() + 'stat/crypta_ids_size_hist'),
                yt_luigi.YtTarget(self.vertices_config.get_vertices_folder() + 'stat/vertices_count_stat')]


def count_id_values_per_crypta_id(crypta_id_key, recs, id_types_idx):
    id_values = defaultdict(set)
    for r in recs:
        if r.get('id_type') in id_types_idx.keys():
            id_values[r['id_type']].add(r['id_value'])

    for id_type, values in id_values.iteritems():
        yield {'crypta_id': crypta_id_key['crypta_id'],
               'crypta_id_size': crypta_id_key['crypta_id_size'],
               'id_type': id_type,
               'values': list(values),
               'values_count': len(values),
               '@table_index': id_types_idx[id_type]}


def count_and_average(values_count_key, crypta_id_recs):
    crypta_ids_count = 0
    crypta_id_size_total = 0

    for r in crypta_id_recs:
        crypta_ids_count += 1
        crypta_id_size_total += r['crypta_id_size']

    yield {'values_count': values_count_key['values_count'],
           'crypta_ids_count': crypta_ids_count,
           'crypta_id_size_avg': crypta_id_size_total/ float(crypta_ids_count)}


class IdValuesPerCryptaIdStat(base_luigi_task.BaseTask):
    """Calculates the number of id values per crypta_id"""
    vertices_config = luigi.Parameter()

    def requires(self):
        return [self.vertices_config.producing_task]

    def run(self):
        vertices_type = self.vertices_config.vertices_type
        edges_t = self.vertices_config.get_edges_table()
        workdir = self.vertices_config.get_vertices_folder() + 'stat/ids_per_crypta_id/'
        mr.mkdir(workdir)

        id_types_idx = dict()
        out_tables = []
        for idx, edge_type in enumerate(config.YUID_PAIR_TYPES_EXACT):
            id_types_idx[edge_type.id_type] = idx
            out_tables.append(workdir + edge_type.id_type)

        # assume sorted
        yt.run_reduce(partial(count_id_values_per_crypta_id, id_types_idx=id_types_idx),
                      edges_t,
                      out_tables,
                      reduce_by=['crypta_id', 'crypta_id_size'])

        mr.sort_all(out_tables, sort_by=['id_type', 'values_count'])

        ops = []
        for t in out_tables:
            ops.append(yt.run_reduce(count_and_average,
                                     t,
                                     t + '_stat',
                                     reduce_by=['id_type', 'values_count'],
                                     sync=False))
        utils.wait_all(ops)

        metrics = []
        for edge_type in config.YUID_PAIR_TYPES_EXACT:
            t = workdir + edge_type.id_type + '_stat'
            for r in yt.read_table(t):
                values_count = r['values_count']
                if values_count >= 5:
                    values_count = '5_or_more'

                crypta_ids_count = r['crypta_ids_count']
                crypta_id_size_avg = r['crypta_id_size_avg']

                metrics_name = 'ids_per_crypta_id.%s.%s.%s' % (vertices_type, edge_type.id_type, values_count)
                metrics.append((metrics_name, 'crypta_ids_count', crypta_ids_count))
                metrics.append((metrics_name, 'crypta_id_size_avg', crypta_id_size_avg))

        graphite_sender.to_graphite_sender(metrics, self.vertices_config.date)

        for t in out_tables:
            mr.drop(t)


    def output(self):
        workdir = self.vertices_config.get_vertices_folder() + 'stat/ids_per_crypta_id/'
        for edge_type in config.YUID_PAIR_TYPES_EXACT:
            yield yt_luigi.YtTarget(workdir + edge_type.id_type + '_stat', allow_empty=True)


class VerticesQualityMetrics(base_luigi_task.BaseTask):
    vertices_config = luigi.Parameter()

    def requires(self):
        return [self.vertices_config.producing_task]

    def get_metrics_to_calculate(self):
        return ['browsers_penalty',
                'devices_penalty',
                'sex_penalty',
                'too_many_values_penalty',
                'overmatching_penalty',
                'regions_penalty']

    def to_graphite(self, metrics_params):
        metrics = []
        for table_name, penalty_metric, filter_by, group_by in metrics_params:
            filter_name = filter_by or 'all'

            for r in yt.read_table(table_name, raw=False):
                if group_by:
                    group_by_name = r[group_by]
                else:
                    group_by_name = 'all'
                metrics.append(('crypta_id_quality_metrics',
                                '%s.%s.%s.%s' % (self.vertices_config.vertices_type,
                                                 penalty_metric,
                                                 filter_name,
                                                 group_by_name),
                                r['avg']))
        graphite_sender.to_graphite_sender(metrics, self.vertices_config.date)


    def run(self):
        vertices_pretty = self.vertices_config.get_vertices_table()
        vertices_folder = self.vertices_config.get_vertices_folder()
        mr.mkdir(vertices_folder + 'stat')

        yt.run_map_reduce(None, crypta_id_stats,
                          vertices_pretty, vertices_folder + 'stat/crypta_ids_stats',
                          reduce_by='crypta_id')
        yt.run_sort(vertices_folder + 'stat/crypta_ids_stats', sort_by='crypta_id')

        metrics_params = []
        avg_ops = []
        for penalty_metric in self.get_metrics_to_calculate():
            # all crypta ids
            out_t = vertices_folder + 'stat/crypta_ids_stats_%s_avg' % penalty_metric
            avg_ops.append(mr.avg_group_by(vertices_folder + 'stat/crypta_ids_stats',
                                           out_t, column=penalty_metric,
                                           sync=True))
            metrics_params.append((out_t, penalty_metric, None, None))

            # mobile vs not mobile
            out_t = vertices_folder + 'stat/crypta_ids_stats_%s_%s_avg' % (penalty_metric, 'mobile_only')
            avg_ops.append(mr.avg_group_by(vertices_folder + 'stat/crypta_ids_stats',
                                           out_t, column=penalty_metric,
                                           group_by='mobile_only',
                                           sync=True))
            metrics_params.append((out_t, penalty_metric, None, 'mobile_only'))

            # different groups of crypta_id sizes
            crypta_id_size_slices = {
                'less_5': lambda rec: rec['crypta_id_size'] < 5,
                '5_between_10': lambda rec: 5 <= rec['crypta_id_size'] < 10,
                'more_eq_10': lambda rec: rec['crypta_id_size'] >= 10,
            }

            for slice_name, crypta_id_size_slice in crypta_id_size_slices.iteritems():
                out_t = vertices_folder + 'stat/crypta_ids_stats_%s_%s_avg' % (penalty_metric, slice_name)
                avg_ops.append(mr.avg_group_by(vertices_folder + 'stat/crypta_ids_stats',
                                               out_t, column=penalty_metric,
                                               filter_rec=crypta_id_size_slice,
                                               sync=True))
                metrics_params.append((out_t, penalty_metric, slice_name, None))

        utils.wait_all(avg_ops)

        self.to_graphite(metrics_params)


    def output(self):
        vertices_folder = self.vertices_config.get_vertices_folder()
        m = self.get_metrics_to_calculate()
        return [yt_luigi.YtTarget(self.vertices_config.get_vertices_folder() + 'stat/crypta_ids_stats')] + \
               [yt_luigi.YtTarget(vertices_folder + 'stat/crypta_ids_stats_' + t + '_avg') for t in m]


def reduce_add_cid(key, recs):
    lrecs = list(recs)
    desk = [r for r in lrecs if 'device_type' in r and r['device_type'] == 'desk']
    mob = [r for r in lrecs if 'device_type' in r and r['device_type'] == 'mob']
    device = [r for r in lrecs if 'device_type' in r and r['device_type'] == 'app']
    cid = [r for r in lrecs if 'crypta_id' in r]

    if cid:
        crypta_id = cid[0]['crypta_id']
        if desk:
            desk_rec = desk[0]
            desk_rec['crypta_id'] = crypta_id
            desk_rec['@table_index'] = 0
            yield desk_rec
        if mob:
            mob_rec = mob[0]
            mob_rec['crypta_id'] = crypta_id
            mob_rec['@table_index'] = 1
            yield mob_rec
        if device:
            device_rec = device[0]
            device_rec['crypta_id'] = crypta_id
            device_rec['@table_index'] = 2
            yield device_rec


def reduce_cid_ua(_, recs):
    lrecs = list(recs)
    yi = [r for r in lrecs if 'subkey' in r and r['subkey'] == 'yi']
    cid_yuid = [r for r in lrecs if 'crypta_id' in r and r['id_type'].startswith('yuid')]
    cid_devid = [r for r in lrecs if 'crypta_id' in r and r['id_type'].startswith('deviceid')]

    if yi and cid_yuid:
        ua_profile = mr.get_field_value('ua_profile', yi[0]['value'])
        crypta_id = cid_yuid[0]['crypta_id']
        yield {'crypta_id': crypta_id, 'ua_profile': ua_profile}

    if cid_devid:
        crypta_id = cid_devid[0]['crypta_id']
        yield {'crypta_id': crypta_id, 'ua_profile': 'app'}


def reduce_cid_ua_yuid(_, recs, ua_type):
    lrecs = list(itertools.islice(recs, STAT_CID_SIZE_OOM + 1))

    if len(lrecs) > STAT_CID_SIZE_OOM:
        return

    ci_recs = [r for r in lrecs if r['@table_index'] == 0]
    yuid_recs = [r for r in lrecs if r['@table_index'] == 1]

    if ci_recs and yuid_recs:
        for ci_rec in ci_recs:
            if ci_rec['ua_profile'].startswith(ua_type):
                for yuid_rec in yuid_recs:
                    yuid_rec['@table_index'] = 0
                    yield yuid_rec
                return

def cross_device_stats_by_vertices(all_f, out_f, vertices):
    # Join cid

    mr.sort_all([
        all_f + 'desk_yuids_rus',
        all_f + 'mob_yuids_rus',
        all_f + 'devices',
        vertices
    ], 'key')

    yt.run_reduce(reduce_add_cid,
                  [all_f + 'desk_yuids_rus', all_f + 'mob_yuids_rus', all_f + 'devices', vertices],
                  [out_f + 'desk_yuids_rus_cid', out_f + 'mob_yuids_rus_cid', out_f + 'devices_cid'],
                  reduce_by='key')
    mr.sort_all([
        out_f + 'desk_yuids_rus_cid',
        out_f + 'mob_yuids_rus_cid',
        out_f + 'devices_cid',
        ], 'crypta_id')

    # Join ua to find cids containing specific UA type (desk/mob/device)
    yt.run_reduce(reduce_cid_ua,
                  [vertices, config.GRAPH_YT_DICTS_FOLDER + 'yuid_ua'],
                  [out_f + 'cid_ua'],
                  reduce_by='key')
    yt.run_sort(out_f + 'cid_ua', sort_by='crypta_id')

    # Find cross-device
    utils.wait_all([
        yt.run_reduce(partial(reduce_cid_ua_yuid, ua_type='d'),
                      [out_f + 'cid_ua', out_f + 'mob_yuids_rus_cid'],
                      [out_f + 'mob_yuids_with_desk'],
                      reduce_by='crypta_id', sync=False),
        yt.run_reduce(partial(reduce_cid_ua_yuid, ua_type='m'),
                      [out_f + 'cid_ua', out_f + 'desk_yuids_rus_cid'],
                      [out_f + 'desk_yuids_with_mob'],
                      reduce_by='crypta_id', sync=False),

        yt.run_reduce(partial(reduce_cid_ua_yuid, ua_type='m'),
                      [out_f + 'cid_ua', out_f + 'devices_cid'],
                      [out_f + 'devices_with_mob'],
                      reduce_by='crypta_id', sync=False),
        yt.run_reduce(partial(reduce_cid_ua_yuid, ua_type='d'),
                      [out_f + 'cid_ua', out_f + 'devices_cid'],
                      [out_f + 'devices_with_desk'],
                      reduce_by='crypta_id', sync=False)
    ])

    # traffic
    utils.wait_all([
        graph_stat.sum_by_sources(out_f, 'mob_yuids_with_desk', ['source_types'], flatten=True),
        graph_stat.sum_by_sources(out_f, 'desk_yuids_with_mob', ['source_types'], flatten=True),
        graph_stat.sum_by_sources(out_f, 'devices_with_desk', ['source_types'], flatten=True),
        graph_stat.sum_by_sources(out_f, 'devices_with_mob', ['source_types'], flatten=True),
        graph_stat.sum_by_sources(out_f, 'mob_yuids_with_desk', ['source_types'], flatten=False),
        graph_stat.sum_by_sources(out_f, 'desk_yuids_with_mob', ['source_types'], flatten=False),
        graph_stat.sum_by_sources(out_f, 'devices_with_desk', ['source_types'], flatten=False),
        graph_stat.sum_by_sources(out_f, 'devices_with_mob', ['source_types'], flatten=False)
    ])

    graph_stat.sum_sources_to_total_in_dir(out_f, ['source_types'])


def split_devices_indev(key, recs):
    cid_rec, dev_rec = (None, None)
    for rec in recs:
        if 'crypta_id' in rec:
            cid_rec = rec
        else:
            dev_rec = rec
    if dev_rec:
        os = dev_rec.get('os', '')
        out_index = 0 if os == 'ios' else (1 if os == 'android' else 2)
        if cid_rec:
            if 'sources' in cid_rec and ('indev' in cid_rec['sources']):
                # split indevice-mathes into 3 tables by os
                yield {'key': key['key'], 'sources': cid_rec['sources'], '@table_index': out_index}
        # split devices into 3 tables by os
        yield {'key': key['key'], '@table_index': out_index + 3}


def in_device_stats_by_browser_prepare_map(rec):
    maybe_id2_browser = rec.get('id2_browser') or ''
    maybe_id1_ua = rec.get('id1_ua') or ''
    if 'indev' in rec.get('match_type', '') and \
            rec['pair_type'] == 'd_y' and \
            bool(maybe_id2_browser) and \
            bool(maybe_id1_ua):

        devid = rec['id1']
        browser = maybe_id2_browser.split('|')[0]  # mobilesafari|10.0
        platform = maybe_id1_ua.split('|')[3]  # m|phone|apple|ios|10.2.1
        yield dict(devid=devid, browser=browser, platform=platform)


def in_device_stats_by_browser_prepare_reduce(key, recs):
    browsers = set()
    for r in recs:
        browsers.add((r['browser'], r['platform']))

    for b in browsers:
        yield dict(key=key['devid'], browser=b[0], platform=b[1])


def in_device_stats_by_browser_filter_devices_map(rec):
    rec['jord'] = rec['@table_index']
    rec['@table_index'] = 0
    yield rec


def in_device_stats_by_browser_filter_devices_reduce(key, recs):
    dev_rec = None
    for r in recs:
        if r['jord'] == 0:
            dev_rec = r
        else:
            if dev_rec is not None:
                yield r


@yt.aggregator
def in_device_stats_by_browser_count_map(recs):
    counts = defaultdict(lambda: defaultdict(int))
    for r in recs:
        counts[r['platform']][r['browser']] += 1
    for p in counts:
        for b in counts[p]:
            yield dict(browser=b, platform=p, count=counts[p][b])


def in_device_stats_by_browser_count_reduce(key, recs):
    counts = defaultdict(lambda: defaultdict(int))
    for r in recs:
        counts[r['platform']][r['browser']] += r['count']
    for p in counts:
        for b in counts[p]:
            yield dict(browser=b, platform=p, count=counts[p][b])


def in_device_stats_by_vertices(all_f, out_f, vertices, edges, date):
    indev_names = ['indev_ios', 'indev_android', 'indev_other']
    devices_names = ['dev_ios', 'dev_android', 'dev_other']

    mr.sort_all([all_f + 'devices', vertices], 'key')
    yt.run_reduce(split_devices_indev,
                  [all_f + 'devices', vertices],
                  [out_f + t for t in (indev_names + devices_names)],
                  reduce_by='key')

    yt.run_map_reduce(in_device_stats_by_browser_prepare_map,
                      in_device_stats_by_browser_prepare_reduce,
                      edges, out_f + 'device_browsers',
                      reduce_by='devid')

    yt.run_map_reduce(in_device_stats_by_browser_filter_devices_map,
                      in_device_stats_by_browser_filter_devices_reduce,
                      [all_f + 'devices', out_f + 'device_browsers'],
                      out_f + 'device_browsers_filtered',
                      sort_by=['key', 'jord'], reduce_by='key')

    yt.run_map_reduce(in_device_stats_by_browser_count_map,
                      in_device_stats_by_browser_count_reduce,
                      out_f + 'device_browsers_filtered',
                      out_f + 'device_browsers_counts',
                      reduce_by=['platform', 'browser'])

    counts = dict([(k, yt.row_count(out_f + k)) for k in (indev_names + devices_names)])
    counts['indev_all'] = sum(counts[k] for k in indev_names)
    counts['dev_all'] = sum(counts[k] for k in devices_names)

    metrics = [('graph_stat_in_device', k, str(v)) for k, v in counts.iteritems()]
    graphite_sender.to_graphite_sender(metrics, date)

    by_browser = [('graph_stat_in_device', '%s.%s' % (r['platform'].replace(' ', '_').replace('.', '_'), r['browser'].replace(' ', '_').replace('.', '_')), r['count'])
                  for r in yt.read_table(out_f + 'device_browsers_counts')]
    graphite_sender.to_graphite_sender(by_browser, date)


class VerticesMatchingCoverageStats(yt_luigi.BaseYtTask):
    """
    Calculates how many real yuids are linked cross-device and in-device yuids and devices
    """
    vertices_config = luigi.Parameter()

    def input_folders(self):
        return {
            'total_stats': config.YT_OUTPUT_FOLDER + self.vertices_config.date + '/stat_new/all/',
        }

    def output_folders(self):
        return {
            'mathing_stats': self.vertices_config.get_vertices_folder() + 'stat/matching/'
        }

    def requires(self):
        return [graph_stat.PrepareTodayTotalUsageStats(date=self.vertices_config.date),
                self.vertices_config.producing_task]

    def run(self):
        total_stats_folder = self.in_f('total_stats')
        vertices_table = self.vertices_config.get_vertices_table()
        edges_table = self.vertices_config.get_edges_table()
        output_folder = self.out_f('mathing_stats')

        mr.mkdir(output_folder)
        mr.mkdir(output_folder + 'cross_device/')
        mr.mkdir(output_folder + 'in_device/')

        cross_device_stats_by_vertices(total_stats_folder,
                                       output_folder + 'cross_device/',
                                       vertices_table)

        in_device_stats_by_vertices(total_stats_folder,
                                    output_folder + 'in_device/',
                                    vertices_table,
                                    edges_table,
                                    self.vertices_config.date)

    def output(self):
        output_folder = self.out_f('mathing_stats')
        # TODO: add real number of tables
        return [yt_luigi.YtTarget(output_folder + 'cross_device/sum_by_source_types/mob_yuids_with_desk_count_total'),
                yt_luigi.YtTarget(output_folder + 'cross_device/sum_by_source_types/desk_yuids_with_mob_count_total')]



class VerticesExperimentStatsTask(luigi.WrapperTask):
    """
    Aggregation statistics for a single vertices type. Can be used to ad-hoc experiment checking.
    """
    vertices_config = luigi.Parameter()

    def requires(self):

        qm_task = VerticesQualityMetrics(self.vertices_config)

        return [
            radius_metrics.RadiusMetricsForVertices(self.vertices_config),
            VerticesCountAndSizeStats(self.vertices_config, qm_task),
            graph_ecommerce.VerticesEcommerceStats(self.vertices_config),
            VerticesQualityMetrics(self.vertices_config),
            VerticesMatchingCoverageStats(self.vertices_config)
        ]


class VerticesAllStatsTask(luigi.WrapperTask):
    """
    Aggregation of statistics tasks for all vertices, that are currently in production
    """
    vertices_configs = luigi.Parameter()

    def all_stat_tasks(self, vertices_config):
        tasks = list()

        qm_task = VerticesQualityMetrics(vertices_config)

        tasks.append(qm_task)
        tasks.append(radius_metrics.RadiusMetricsForVertices(vertices_config))
        tasks.append(VerticesCountAndSizeStats(vertices_config, qm_task))
        tasks.append(graph_ecommerce.VerticesEcommerceStats(vertices_config))

        prev_vertices_config = vertices_config.get_previous_vertices_config()
        if prev_vertices_config:
            tasks.append(VerticesStabilityStats(prev_vertices_config, vertices_config, 'stability'))

        if not vertices_config.vertices_only:
            # requires edges table
            tasks.append(IdValuesPerCryptaIdStat(vertices_config))

        return tasks


    def requires(self):
        tasks = []
        for vertices_config in self.vertices_configs:
            tasks.extend(self.all_stat_tasks(vertices_config))

        return tasks


if __name__ == '__main__':
    yt.config.set_proxy(config.MR_SERVER)
    yt.config["tabular_data_format"] = yt.YsonFormat(process_table_index=True)







