import os
from functools import partial

import luigi
import yt.wrapper as yt

from graph_webvisor import ImportWebvisorFromStbxTask
from lib.luigi import yt_luigi
from rtcconf import config
from utils import mr_utils as mr
from utils import utils


def prepare_webvisor_for_stat(key, recs, domain_or_url):
    yuid = key['yuid']
    domain = key[domain_or_url]
    is_login = key['is_login']
    id_type = key['id_type']

    ids_per_yuid = set()
    for r in recs:
        ids_per_yuid.add(r['id_value'])

    ids_per_yuid_count = len(ids_per_yuid)

    if id_type == 'email':
        if utils.is_true(is_login):
            if ids_per_yuid_count < 3:
                table_index = 0
            else:
                table_index = 1
        else:
            table_index = 2
    else:
        if utils.is_true(is_login):
            if ids_per_yuid_count < 3:
                table_index = 3
            else:
                table_index = 4
        else:
            table_index = 5

    yield {'yuid': yuid, domain_or_url: domain,
           'ids_count': ids_per_yuid_count,
           '@table_index': table_index}


def count_date_stats(key, recs):
    domain = key['domain']
    is_birth_date = key['is_birth_date']

    cnt = 0

    for rec in recs:
        cnt += 1

    if utils.is_true(is_birth_date):
        yield {'domain': domain,
               '@table_index': 0,
               'count': cnt}
    else:
        yield {'domain': domain,
               '@table_index': 1,
               'count': cnt}


def mk_large_keys_filter(large_key_size):
    def large_keys_filter(rec):
        url_ok = len(rec.get('url') or '') < large_key_size
        domain_ok = len(rec.get('domain') or '') < large_key_size

        if url_ok:
            rec['@table_index'] = 0
        else:
            rec['@table_index'] = 1
        yield rec

        if domain_ok:
            rec['@table_index'] = 2
        else:
            rec['@table_index'] = 3
        yield rec

    return large_keys_filter


class WebvisorDatesStats(yt_luigi.BaseYtTask):
    date = luigi.Parameter()

    def input_folders(self):
        return {
            'webvisor_date_processed': config.WEBVISOR_DATE_FOLDER_PROCESSED
        }

    def output_folders(self):
        return {
            'webvisor_date_stats': os.path.join(config.WEBVISOR_DATE_FOLDER_PROCESSED, 'stats', self.date)
        }

    def requires(self):
        return [ImportWebvisorFromStbxTask(date=self.date)]

    def calc_stats(self, webvisor_table):
        out_f = self.out_f('webvisor_date_stats')
        mr.mkdir(out_f)

        stat_tables = [
            os.path.join(out_f, 'good_age_per_domain'),
            os.path.join(out_f, 'bad_age_per_domain'),
        ]

        yt.run_sort(webvisor_table, sort_by=['domain', 'is_birth_date'])
        yt.run_reduce(count_date_stats,
                      webvisor_table,
                      stat_tables,
                      reduce_by=['domain', 'is_birth_date'])

        mr.sort_all(stat_tables, sort_by='count')

    def run(self):
        day_tables = mr.get_date_tables(self.in_f('webvisor_date_processed'), None, int(config.STORE_DAYS))

        out_f = self.out_f('webvisor_date_stats')
        mr.mkdir(out_f)

        yt.run_merge(day_tables, os.path.join(out_f, 'webvisor_date_processed'), spec={'combine_chunks': True})
        self.calc_stats(os.path.join(out_f, 'webvisor_date_processed'))
        # If this folder is empty this tells Luigi that ImportWebvisorFromStbxTask could be run again
        mr.drop(os.path.join(out_f, 'webvisor_date_processed'))

    def output(self):
        for t in ['good_age_per_domain', 'bad_age_per_domain']:
            yield yt_luigi.YtTarget(os.path.join(self.out_f('webvisor_date_stats'), t))


class WebvisorDomainsStats(yt_luigi.BaseYtTask):
    date = luigi.Parameter()

    def input_folders(self):
        return {
            'webvisor_processed': config.WEBVISOR_LOGIN_FOLDER_PROCESSED
        }

    def output_folders(self):
        return {
            'webvisor_stats': os.path.join(config.WEBVISOR_LOGIN_FOLDER_PROCESSED, 'stats', self.date)
        }

    def requires(self):
        return [ImportWebvisorFromStbxTask(date=self.date)]

    def calc_stats(self, webvisor_table, domain_or_url):
        out_f = os.path.join(self.out_f('webvisor_stats'), domain_or_url)
        mr.mkdir(out_f)

        stat_tables = [os.path.join(out_f, 'email_login_yuids'),
                       os.path.join(out_f, 'email_multilogin_yuids'),
                       os.path.join(out_f, 'email_no_login_yuids'),
                       os.path.join(out_f, 'phone_login_yuids'),
                       os.path.join(out_f, 'phone_multilogin_yuids'),
                       os.path.join(out_f, 'phone_no_login_yuids')]

        # split yuids to the groups of interest
        yt.run_sort(webvisor_table, sort_by=['yuid', domain_or_url, 'is_login', 'id_type'])
        yt.run_reduce(partial(prepare_webvisor_for_stat, domain_or_url=domain_or_url),
                      webvisor_table,
                      stat_tables,
                      reduce_by=['yuid', domain_or_url, 'is_login', 'id_type'])

        # count yuids per domain
        utils.wait_all(mr.count_field_recs(table,
                                           table + '_per_' + domain_or_url,
                                           columns=[domain_or_url],
                                           expect_large_keys=False, sync=False)
                       for table in stat_tables)
        mr.sort_all([table + '_per_' + domain_or_url for table in stat_tables], sort_by='count')

        for t in stat_tables:
            mr.drop(t)

    def run(self):
        day_tables = mr.get_date_tables(self.in_f('webvisor_processed'), None, int(config.STORE_DAYS))

        out_f = self.out_f('webvisor_stats')
        mr.mkdir(out_f)
        webvisor_processed = os.path.join(out_f, 'webvisor_processed')
        good_urls = webvisor_processed + '_url_ok'
        bad_urls = webvisor_processed + '_url_bad'
        good_domains = webvisor_processed + '_domain_ok'
        bad_domains = webvisor_processed + '_domain_bad'
        yt.run_merge(day_tables, webvisor_processed, spec={'combine_chunks': True})

        yt.run_map(mk_large_keys_filter(config.YT_KEY_SIZE_LIMIT), webvisor_processed, [good_urls, bad_urls, good_domains, bad_domains])

        self.calc_stats(good_domains, 'domain')  # stats per domain
        self.calc_stats(good_urls, 'url')     # stats per url

        # If this folder is empty this tells Luigi that ImportWebvisorFromStbxTask could be run again
        mr.drop(webvisor_processed)
        mr.drop(good_urls)
        mr.drop(bad_urls)
        mr.drop(good_domains)
        mr.drop(bad_domains)

    def output(self):
        for groupby_type in ['url', 'domain']:
            for stat_type in ['email_login_yuids',
                              'email_multilogin_yuids',
                              'email_no_login_yuids',
                              'phone_login_yuids',
                              'phone_multilogin_yuids',
                              'phone_no_login_yuids']:
                t = os.path.join(groupby_type, stat_type + '_per_' + groupby_type)
                yield yt_luigi.YtTarget(os.path.join(self.out_f('webvisor_stats'), t))

