#!/usr/bin/python
# coding=utf-8
import itertools
import logging
import sys
from functools import partial

import luigi
import yt.wrapper as yt

from lib import graphite_sender
from lib.luigi import yt_luigi
from rtcconf import config
from utils import mr_utils as mr
from utils import utils

logger = logging.getLogger(__file__)


@yt.aggregator
class Qb2Mapper:
    def __init__(self, dates):
        self.dates = dates

    # out tables : [distr_date1, distr_date2, ..., AppsFlyer_date1, AppsFlyer_date2, ... ], total = 2*len(dates)
    def __call__(self, recs):
        from qb2.api.v1 import QB2
        qb = QB2(log='mobile-installs-cube', fields=['activation_date', 'activation_version', 'device_id',
                                                     'platform', 'project'])
        for rec in recs:
            record = qb.process(parsed_log_line=rec)
            activation_date = record.activation_date
            activation_version = record.activation_version
            device_id = record.device_id
            platform = record.platform
            project = record.project

            if activation_date in self.dates and \
                    project in ['Mobile_Soft_Browser', 'Search_Mobile_App'] and platform in ['Android', 'iOS']:
                date_index = self.dates.index(activation_date)

                yield {'mmetric_devid': device_id,
                       'activation_date': activation_date,
                       'activation_version': activation_version,
                       'project': project,
                       'platform': platform,
                       '@table_index': date_index}


def reduce_no_limit(key, recs, own_browsers):
    browsers = set()

    has_yuid = False
    has_crypta = False
    has_own = False
    has_unknow = False
    has_other = False

    for rec in recs:
        if rec['@table_index'] == 0:
            has_yuid = True
            browser = rec.get('yuid_browser', '')
            if not browser:
                browser = 'unknown'
            browsers.add(browser)
        elif rec['@table_index'] == 1:
            has_crypta = True
        else:
            rec['@table_index'] -= 2
            if browsers:
                if own_browsers[rec['project']] in browsers:
                    has_own = True
                elif 'unknown' in browsers:
                    has_unknow = True
                else:
                    has_other = True

            has_browser = has_yuid and browsers and \
                any(browser in config.DISTRIBUTION_BROWSERS_WHITE_LIST for browser in browsers)

            rec['has_crypta'] = has_crypta
            rec['has_yuid'] = has_yuid
            rec['has_own'] = has_own
            rec['has_unknow'] = has_unknow
            rec['has_other'] = has_other
            rec['has_browser'] = has_browser

            yield rec


def reduce_stat(key, recs, dates, stat_keys):
    for date, date_recs in itertools.groupby(recs, lambda x: x['activation_date']):
        res = dict(key)
        res.update([(s, 0) for s in stat_keys])
        total = 0
        for rec in date_recs:
            for stat in stat_keys:
                if rec[stat]:
                    res[stat] += 1
            total += 1

        res['@table_index'] = dates.index(date)
        res['total'] = total
        yield res


def do_run(date, in_cube_folder, in_dict_folder, out_graph_folder):
    dates = mr.generate_dates_before(date, int(config.STORE_DAYS))
    distr_tables = [out_graph_folder + d + '/stat_distr/' + 'cube' for d in dates]
    for t in distr_tables:
        mr.mkdir(t.rsplit('/', 1)[0])

    yt.run_map(Qb2Mapper(dates), in_cube_folder + date, distr_tables)

    distr_tables = [t for t in distr_tables if yt.exists(t)]
    mr.sort_all(distr_tables, sort_by='mmetric_devid')

    dev_yuid_t = out_graph_folder + date + '/stat_distr/dev_yuid_indevice_perfect_no_limit'
    dev_uuid_t = out_graph_folder + date + '/stat_distr/dev_uuid_indevice_perfect_no_limit'

    utils.wait_all([
        yt.run_sort(in_dict_folder + 'dev_yuid_indevice_perfect_no_limit', dev_yuid_t,
                    sort_by='mmetric_devid', sync=False),
        yt.run_sort(in_dict_folder + 'dev_uuid_indevice_perfect_no_limit', dev_uuid_t,
                    sort_by='mmetric_devid', sync=False),
    ])

    # process distribution tables
    yt.run_reduce(partial(reduce_no_limit,
                          own_browsers={'Mobile_Soft_Browser': 'yandexbrowser', 'Search_Mobile_App': 'yandexsearch'}),
                  [dev_yuid_t, dev_uuid_t] + distr_tables,
                  [t + '_info' for t in distr_tables], reduce_by='mmetric_devid')

    mr.sort_all([t + '_info' for t in distr_tables], sort_by=['project', 'platform', 'activation_date'])
    yt.run_reduce(partial(reduce_stat, dates=dates, stat_keys=['has_yuid', 'has_crypta', 'has_own', 'has_unknow',
                                                               'has_other', 'has_browser']),
                  [t + '_info' for t in distr_tables], [t + '_stat' for t in distr_tables],
                  sort_by=['project', 'platform', 'activation_date'], reduce_by=['project', 'platform'])

    mr.drop(dev_yuid_t)
    mr.drop(dev_uuid_t)


def do_graphite(date, out_graph_folder):
    dates = mr.generate_dates_before(date, int(config.STORE_DAYS))
    date_tables = filter(lambda x: yt.exists(x[1]), [(d, out_graph_folder+d+'/stat_distr/cube_stat') for d in dates])

    logger.info('Cube stat: %s' % repr(date_tables))

    for d, t in sorted(date_tables):
        for rec in yt.read_table(t, raw=False):
            # make snapshot for last date; it won't be overwritten later
            prefixes = ['']
            if d == date:
                prefixes += ['snapshot_']

            for pr in prefixes:
                metrics = [
                    (rec['project'].lower() + '.' + rec['platform'].lower(), pr+'total', rec['total']),
                    (rec['project'].lower() + '.' + rec['platform'].lower(), pr+'has_crypta', rec['has_crypta']),
                    (rec['project'].lower() + '.' + rec['platform'].lower(), pr+'has_yuid', rec['has_yuid']),
                    (rec['project'].lower() + '.' + rec['platform'].lower(), pr+'has_other', rec['has_other']),
                    (rec['project'].lower() + '.' + rec['platform'].lower(), pr+'has_browser', rec['has_browser']),
                    (rec['project'].lower() + '.' + rec['platform'].lower(), pr+'has_own', rec['has_own']),
                    (rec['project'].lower() + '.' + rec['platform'].lower(), pr+'has_unknown', rec['has_unknow']),
                ]
                graphite_sender.to_graphite_sender(metrics, d)


class GraphCubeStatTask(yt_luigi.BaseYtTask):

    date = luigi.Parameter()

    def input_folders(self):
        return {
            'cube': config.STATBOX_CUBE_INSTALL,
            'dict': config.GRAPH_YT_DICTS_FOLDER,
        }

    def output_folders(self):
        return {
            'graph': config.YT_OUTPUT_FOLDER,
        }

    def requires(self):
        from matching.device_matching.distr.device_yuid_distr_dicts import UpdateYuidDevidIndeviceAllDict
        return [UpdateYuidDevidIndeviceAllDict(self.date)]

    def run(self):
        if not yt.exists(self.in_f('cube') + self.date):
            raise RuntimeError('Cube table %s doesn\'t exist' % (self.in_f('cube') + self.date))
        do_run(self.date, self.in_f('cube'), self.in_f('dict'), self.out_f('graph'))
        do_graphite(self.date, self.out_f('graph'))

    def output(self):
        return [yt_luigi.YtTarget(self.out_f('graph') + self.date + '/stat_distr/cube_stat', allow_empty=True)]


if __name__ == '__main__':
    yt.config.set_proxy(config.MR_SERVER)

    date = sys.argv[1]
    # out = '//home/crypta/team/rodion/CRYPTAIS-996-yabro/metrica/'
    out = config.YT_OUTPUT_FOLDER
    do_run(date, config.STATBOX_CUBE_INSTALL, config.GRAPH_YT_DICTS_FOLDER, out)
    do_graphite(date, out)
