import logging
from collections import defaultdict
from functools import partial

import luigi
import yt.wrapper as yt

from data_imports.import_logs import app_metrica_day
from lib import graphite_sender
from lib.luigi import yt_luigi
from matching.device_matching.app_metrica import app_metrica_to_old_formats
from matching.device_matching.perfect.config import merge_configs
from matching.device_matching.perfect.merge_month_perfect import DeviceYuidsMergeMonthTask
from rtcconf import config
from utils import mr_utils as mr
from utils import utils

YANDEX_API_KEYS = {'1', '2', '3', '4', '13', '1111', '5683', '5727', '5745', '5746', '7633', '7999', '9847', '9850',
                   '9970', '9973', '10012', '10015', '10030', '10123', '10126', '10129', '10177', '10180', '10267',
                   '10270', '10318', '10321', '10324', '10327', '10606', '10609', '14833', '14836', '16807', '17569',
                   '17914', '18313', '18316', '18892', '18895', '19531', '19534', '19537', '20737', '20740', '21925',
                   '21952', '22540', '22675', '22678', '22729', '22792', '23101', '23104', '23107', '23548', '23851',
                   '24616', '24682', '24691', '25204', '25378', '25669', '25816', '25819', '25852', '25870', '26002',
                   '26005', '27232', '27862', '27952', '28210', '28554', '28587', '28620', '28623', '28932', '28935',
                   '29313', '29517', '29520', '29733', '29739', '29913', '30261', '30488', '30548', '30677', '30680',
                   '30683', '30686', '30689', '30692', '30695', '30698', '31476', '31479', '31647', '31650', '32811',
                   '33131', '33136', '33521', '34046', '34116', '34301', '37460', '37505', '37800', '37810', '38000',
                   '38985', '39730', '40704', '40708', '41199', '42949', '42984', '42989', '43139', '44039', '44194',
                   '44592', '44754', '44974', '58396', '58401', '60277', '61071', '62063', '73786', '86151', '86696',
                   '86701', '90874', '95594', '96735', '106400', '106450', '108945', '110340', '112204', '112209',
                   '112684', '115220', '115224', '119063', '119068', '122699', '130650', '130655', '132355', '132360',
                   '135760', '135855', '137670', '137715', '139260', '139265', '140590', '141215', '142195', '149390',
                   '160400', '162985', '178216', '178830', '182465', '185600', '208375', '209975', '216500', '218300',
                   '240290', '248740', '284530', '288312', '314999', '320379'}

def map_apps(rec):
    uuid = rec['key']
    devid = mr.get_field_value('devid', rec['value'])
    app_id = mr.get_field_value('app_id', rec['value'])
    api_key = mr.get_field_value('api_key', rec['value'])
    idfa = mr.get_field_value('idfa', rec['value'])
    google_adv_id = mr.get_field_value('google_adv_id', rec['value'])
    platform = mr.get_field_value('os', rec['value'])
    if idfa and google_adv_id:
        # raise Exception('Can\'t be android and ios the same time')
        return

    yield {'uuid': uuid, 'devid': devid, 'app_id': app_id, 'api_key': api_key, 'platform': platform}


def calculate_app_stats(mobile_day_f, in_dict_f, out_dict_f, workdir, date):
    # map apps
    utils.wait_all([
        yt.run_map(map_apps,
                   mobile_day_f + 'uuid_info',
                   workdir + 'apps_day_tmp', sync=False),
        yt.run_map(map_apps,
                   in_dict_f + 'uuid_dev_info',
                   out_dict_f + 'apps_month', sync=False),
    ])

    mr.sort_all([
        workdir + 'apps_day_tmp',
        out_dict_f + 'apps_month',
    ], sort_by='uuid')

    # TODO: this is stupid hack because os is not stored for day tables and we need to join it from month
    yt.run_reduce(partial(mr.filter_left_by_right,
                          keep_not_found=True,
                          right_columns_to_join=['platform']),
                  [workdir + 'apps_day_tmp', out_dict_f + 'apps_month'],
                  [workdir + 'apps_day', workdir + 'uuid_day_month_not_matched'],
                  reduce_by='uuid'),

    mr.merge_chunks_all([
        workdir + 'apps_day', workdir + 'uuid_day_month_not_matched'
    ])

    # count stats
    utils.wait_all([
        mr.count_field_recs(workdir + 'apps_day',
                            workdir + 'apps_day_stat',
                            columns=['app_id', 'api_key', 'platform'],
                            add_desc_count=True, sync=False),
        mr.count_field_recs(out_dict_f + 'apps_month',
                            workdir + 'apps_month_stat',
                            columns=['app_id', 'api_key', 'platform'],
                            add_desc_count=True, sync=False)
    ])
    mr.sort_all([
        workdir + 'apps_day_stat',
        workdir + 'apps_month_stat',
    ], sort_by='desc')

    mr.drop(workdir + 'apps_day_tmp')


def reduce_by_uuid(uuid_key, recs):
    sources = defaultdict(int)
    for r in recs:
        sources[r['source_type']] += 1
    yield {'uuid': uuid_key['uuid'], 'sources': utils.default_to_regular(sources)}


def count_apps_by_sources(app_key, recs, sources_index):
    app_id = app_key['app_id']
    api_key = app_key['api_key']
    platform = app_key['platform']

    uuids_count_by_source = defaultdict(int)
    yuids_count_by_source = defaultdict(int)
    total_uuids_count = 0
    total_yuids_count = 0
    for r in recs:
        total_uuids_count += 1
        for s, yuids_count in r['sources'].iteritems():
            uuids_count_by_source[s] += 1

            yuids_count_by_source[s] += yuids_count
            total_yuids_count += yuids_count  # TODO: this is wrong, because sources can intersect

    yield {'app_id': app_id, 'api_key': api_key,
           'platform': platform, 'source_type': 'total',
           'uuids_count': total_uuids_count, 'yuids_count': total_yuids_count, 'desc': -total_uuids_count,
           '@table_index': len(sources_index)}

    for source_type, uuids_count in uuids_count_by_source.iteritems():
        yuids_count = yuids_count_by_source.get(source_type)
        table_index = sources_index[source_type]

        yield {'app_id': app_id, 'api_key': api_key,
               'platform': platform, 'source_type': source_type,
               'uuids_count': uuids_count, 'yuids_count': yuids_count, 'desc': -uuids_count,
               '@table_index': table_index}


def expand_sources(rec):
    rec['source_type'] = 'total'
    yield rec
    for s, _ in rec['sources'].iteritems():
        rec['source_type'] = s
        yield rec


def calculate_matching_stats(indevice_day_f, indevice_month_f, uuid_app_dict, workdir):
    workdir_day = workdir + 'day/'
    mr.mkdir(workdir_day)
    workdir_month = workdir + 'month/'
    mr.mkdir(workdir_month)

    uuid_matching_tables_day = []
    uuid_matching_tables_month = []
    app_stats_tables_day = []
    app_stats_tables_month = []
    out_indexes = dict()

    index = 0
    for m in merge_configs:
        if m.id1 == config.ID_TYPE_UUID or m.id2 == config.ID_TYPE_UUID:
            uuid_matching_tables_day.append(indevice_day_f + m.id1 + '_' + m.id2 + '_' + m.source_type)
            uuid_matching_tables_month.append(indevice_month_f + m.id1 + '_' + m.id2 + '_' + m.source_type)
            app_stats_tables_day.append(workdir_day + 'app_stats_' + m.source_type)
            app_stats_tables_month.append(workdir_month + 'app_stats_' + m.source_type)
            out_indexes[m.source_type] = index
            index += 1

    utils.wait_all([
        yt.run_map_reduce(None, reduce_by_uuid,
                          uuid_matching_tables_day,
                          workdir_day + 'uuid_tmp',
                          reduce_by='uuid', sync=False),
        yt.run_map_reduce(None, reduce_by_uuid,
                          uuid_matching_tables_month,
                          workdir_month + 'uuid_tmp',
                          reduce_by='uuid', sync=False),
    ])

    mr.sort_all([
        workdir_day + 'uuid_tmp',
        workdir_month + 'uuid_tmp'
    ], sort_by='uuid')

    utils.wait_all([
        yt.run_reduce(partial(mr.filter_left_by_right,
                              keep_not_found=True,
                              right_columns_to_join=['app_id', 'api_key', 'platform']),
                      [workdir_day + 'uuid_tmp', uuid_app_dict],
                      [workdir_day + 'apps', workdir_day + 'apps_not_found'],
                      reduce_by='uuid', sync=False),
        yt.run_reduce(partial(mr.filter_left_by_right,
                              keep_not_found=True,
                              right_columns_to_join=['app_id', 'api_key', 'platform']),
                      [workdir_month + 'uuid_tmp', uuid_app_dict],
                      [workdir_month + 'apps', workdir_month + 'apps_not_found'],
                      reduce_by='uuid', sync=False)
    ])

    mr.merge_chunks_all([
        workdir_day + 'apps', workdir_day + 'apps_not_found',
        workdir_month + 'apps', workdir_month + 'apps_not_found'
    ])

    app_stats_total_day = workdir_day + 'app_stats_total'
    app_stats_total_month = workdir_month + 'app_stats_total'
    utils.wait_all([
        yt.run_map_reduce(None, partial(count_apps_by_sources, sources_index=out_indexes),
                          workdir_day + 'apps',
                          app_stats_tables_day + [app_stats_total_day],
                          reduce_by=['app_id', 'api_key', 'platform'],
                          sync=False),
        yt.run_map_reduce(None, partial(count_apps_by_sources, sources_index=out_indexes),
                          workdir_month + 'apps',
                          app_stats_tables_month + [app_stats_total_month],
                          reduce_by=['app_id', 'api_key', 'platform'],
                          sync=False),
        yt.run_map_reduce(expand_sources, partial(mr.count_by_column, column='source_type', add_desc_count=True),
                          workdir_month + 'apps_not_found',
                          workdir_month + 'app_stats_not_found_devid',
                          reduce_by='source_type')
    ])

    mr.sort_all(app_stats_tables_day + app_stats_tables_month +
                [app_stats_total_day, app_stats_total_month, workdir_month + 'app_stats_not_found_devid'],
                sort_by='desc')


class AppStat(object):
    def __init__(self, app_id, platform):
        self.app_id = app_id
        self.platform = platform
        self.api_keys = defaultdict(int)
        self.dau = 0
        self.mau = 0
        self.sources = set()
        self.daily_source_matching = defaultdict(int)
        self.monthly_source_matching = defaultdict(int)

    def add_daily_source_matching(self, source, count):
        self.daily_source_matching[source] += count
        self.sources.add(source)

    def add_montly_source_matching(self, source, count):
        self.monthly_source_matching[source] += count
        self.sources.add(source)

    def get_daily_matching_percents(self, source):
        if self.dau and source in self.daily_source_matching:
            return self.daily_source_matching[source] / float(self.dau)

    def get_montly_matching_percent(self, source):
        if self.mau and source in self.monthly_source_matching:
            return self.monthly_source_matching[source] / float(self.mau)

    def app_key(self):
        return self.app_id, self.platform

    def __eq__(self, other):
        return self.app_key() == other.app_key() if other is not None else False

    def __hash__(self):
        return hash(self.app_key())

    def __str__(self):
        info = '%s(%s, api_keys=%s)\n' % (self.app_id, self.platform, sorted(self.api_keys.items()))
        info += ('mau=%s, dau=%s\n' % (self.mau, self.dau))
        info += ('sources=%s\n' % ','.join(self.sources))
        for s in self.sources:
            x1 = self.daily_source_matching.get(s, '')
            x2 = self.get_daily_matching_percents(s) or ''
            x3 = self.monthly_source_matching.get(s, '')
            x4 = self.get_montly_matching_percent(s) or ''
            info += '\t%s: daily_count=%s daily_percent=%s monthly_count=%s monthly_percent=%s\n' % (s, x1, x2, x3, x4)

        return info


def get_app_metrics(matching_stats_f, metrika_stats_f):
    stats_per_app = dict()

    def add_or_get_app_stat(rec):
        app_key = (rec['app_id'], rec['platform'])

        if app_key in stats_per_app:
            return stats_per_app[app_key]
        else:
            app_stat = AppStat(rec['app_id'], rec['platform'])
            stats_per_app[app_key] = app_stat
            return app_stat

    # mob_metrika_stats
    for r in yt.read_table(metrika_stats_f + 'apps_day_stat'):
        app_stat = add_or_get_app_stat(r)
        app_stat.dau += r['count']
    for r in yt.read_table(metrika_stats_f + 'apps_month_stat'):
        app_stat = add_or_get_app_stat(r)
        app_stat.mau += r['count']
        app_stat.api_keys[r['api_key']] += r['count']  # count api_key stats by mau

    # matching stats
    source_types = [m.source_type for m in merge_configs
                    if m.id1 == config.ID_TYPE_UUID or m.id2 == config.ID_TYPE_UUID]
    source_types.append('total')

    for source_type in source_types:
        print source_type
        day_matching_table = matching_stats_f + 'day/app_stats_' + source_type
        month_matching_table = matching_stats_f + 'month/app_stats_' + source_type

        for r in yt.read_table(day_matching_table):
            app_stat = add_or_get_app_stat(r)
            app_stat.add_daily_source_matching(source_type, r['uuids_count'])

        for r in yt.read_table(month_matching_table):
            app_stat = add_or_get_app_stat(r)
            app_stat.add_montly_source_matching(source_type, r['uuids_count'])

    return stats_per_app


class MobileMetrikaAppsStats(yt_luigi.BaseYtTask):
    date = luigi.Parameter()

    def input_folders(self):
        return {
            'mobile_day': config.YT_OUTPUT_FOLDER + self.date + '/mobile/',
            'dict': config.GRAPH_YT_DICTS_FOLDER
        }

    def output_folders(self):
        return {
            'mm_stats': config.YT_OUTPUT_FOLDER + self.date + '/mobile/app_stats/',
            'dict': config.GRAPH_YT_DICTS_FOLDER
        }

    def requires(self):
        return [app_metrica_day.ImportAppMetrikaDayTask(date=self.date, run_date=self.date),
                app_metrica_to_old_formats.ConvertAppMetricaDictsToOldFormats(self.date),
                app_metrica_to_old_formats.ConvertAppMetricaDayToOldFormats(self.date)
                ]

    def run(self):
        workdir = self.out_f('mm_stats')
        mr.mkdir(workdir)

        calculate_app_stats(self.in_f('mobile_day'),
                            self.in_f('dict'), self.out_f('dict'),
                            workdir, self.date)

        mr.set_generate_date(self.in_f('dict') + 'apps_month', self.date)

    def output(self):
        workdir = self.out_f('mm_stats')
        return [yt_luigi.YtTarget(workdir + 'apps_day_stat'),
                yt_luigi.YtTarget(workdir + 'apps_month_stat'),
                yt_luigi.YtDateTarget(self.out_f('dict') + 'apps_month', self.date)]


class AppsMatchingStats(yt_luigi.BaseYtTask):
    date = luigi.Parameter()

    def input_folders(self):
        return {
            # TODO: devid_raw_day and devid_raw_month are BROKEN!!
            'indevice_day': config.INDEVICE_YT_FOLDER + self.date + '/perfect/devid_raw_day/',
            'indevice_month': config.INDEVICE_YT_FOLDER + self.date + '/perfect/devid_raw_month/',
            'dict': config.GRAPH_YT_DICTS_FOLDER  # uuid info
        }

    def output_folders(self):
        return {
            'app_matching_stats': config.INDEVICE_YT_FOLDER + self.date + '/perfect/app_stats/',
        }

    def requires(self):
        return [MobileMetrikaAppsStats(self.date),
                app_metrica_to_old_formats.ConvertAppMetricaDictsToOldFormats(self.date),
                app_metrica_to_old_formats.ConvertAppMetricaDayToOldFormats(self.date),
                DeviceYuidsMergeMonthTask(self.date)]

    def run(self):
        workdir = self.out_f('app_matching_stats')
        mr.mkdir(workdir)

        uuid_app_dict = self.in_f('dict') + 'apps_month'
        indevice_day_f = self.in_f('indevice_day')
        indevice_month_f = self.in_f('indevice_month')

        calculate_matching_stats(indevice_day_f, indevice_month_f, uuid_app_dict, workdir)

    def output(self):
        workdir = self.out_f('app_matching_stats')
        for m in merge_configs:
            if m.id1 == config.ID_TYPE_UUID or m.id2 == config.ID_TYPE_UUID:
                yield yt_luigi.YtTarget(workdir + 'day/app_stats_' + m.source_type)
                yield yt_luigi.YtTarget(workdir + 'month/app_stats_' + m.source_type)


class AppStatsToGraphite(yt_luigi.PostGraphTask):
    def __init__(self, date, name):
        super(AppStatsToGraphite, self).__init__(date=date, name=name)

    def requires(self):
        return [MobileMetrikaAppsStats(self.date),
                AppsMatchingStats(self.date)]

    def run_post_graph(self):
        metrika_stats_f = config.YT_OUTPUT_FOLDER + self.date + '/mobile/app_stats/'
        matching_stats_f = config.INDEVICE_YT_FOLDER + self.date + '/perfect/app_stats/'

        metrics = []
        apps = set()
        for app_stat in get_app_metrics(matching_stats_f, metrika_stats_f).values():
            if 'yandex' in app_stat.app_id:
                apps.add(app_stat.app_id)
                platform = app_stat.platform.lower() if app_stat.platform else 'other'
                app_key_normed = app_stat.app_id.replace('.', '_') + '.' + platform

                metrics.append(('app_stats.dau', app_key_normed, app_stat.dau))
                metrics.append(('app_stats.mau', app_key_normed, app_stat.mau))

                for s in app_stat.sources:
                    daily_matching_count = app_stat.daily_source_matching.get(s, 0)
                    daily_matching_percent = app_stat.get_daily_matching_percents(s) or 0
                    monthly_matching_count = app_stat.monthly_source_matching.get(s, 0)
                    monthly_matching_percent = app_stat.get_montly_matching_percent(s) or 0
                    metrics.append(('app_stats.daily_matching_count.%s' % s, app_key_normed, daily_matching_count))
                    metrics.append(('app_stats.daily_matching_percent.%s' % s, app_key_normed, daily_matching_percent))
                    metrics.append(('app_stats.monthly_matching_count.%s' % s, app_key_normed, monthly_matching_count))
                    metrics.append(('app_stats.monthly_matching_percent.%s' % s, app_key_normed, monthly_matching_percent))

        for r in yt.read_table(matching_stats_f + 'month/app_stats_not_found_devid'):
            metrics.append(('app_stats.not_found_devid', r['source_type'], r['count']))

        logging.info('Sending app stats for the following %d apps:\n %s' % (len(apps), list(apps)))
        graphite_sender.to_graphite_sender(metrics, self.date)


if __name__ == '__main__':
    yt.config.set_proxy(config.MR_SERVER)

    luigi.build([AppStatsToGraphite('2016-11-13', 'sds')], workers=1, scheduler_port=8083)
