import os
import datetime
import subprocess

from collections import defaultdict

import luigi
import yt.wrapper as yt

import app_metrica_dict_schemas
from data_imports.import_logs import app_metrica_day
from lib.luigi import yt_luigi
from rtcconf import config
from utils import mr_utils as mr
from utils import utils
from utils.yql_utils import run_yql
from v2 import ids


class AppMetricaDictMergeMonthTask(yt_luigi.BaseYtTask):

    date = luigi.Parameter()

    def input_folders(self):
        return {'graph': config.YT_OUTPUT_FOLDER, }

    def output_folders(self):
        return {
            'ids_storage': config.CRYPTA_IDS_STORAGE,
            'mobile': config.YT_OUTPUT_FOLDER + self.date + '/mobile/', }

    def requires(self):
        return [
            app_metrica_day.ImportAppMetrikaDayTask(date=d, run_date=self.date)
            for d in utils.get_dates_before(self.date, int(config.STORE_DAYS))]

    def run(self):
        date_start = (
            datetime.datetime.strptime(self.date, '%Y-%m-%d') - datetime.timedelta(days=int(config.STORE_DAYS))
        ).strftime('%Y-%m-%d')

        run_yql('MetricaMonthAggregator', dict(date_start=date_start, date_end=self.date), {
            'GRAPH_YT_OUTPUT_FOLDER': config.YT_OUTPUT_FOLDER,
            'CRYPTA_IDS_STORAGE': config.CRYPTA_IDS_STORAGE,
        })

        # TODO: are it necessary to set generate_date to self.date?
        # it seems YQL make already up to date tables?
        device_id_info_out_dir = self.out_f('ids_storage') + ids.CRYPTA_DEVICE_ID + '/'
        uuid_info_out_dir = self.out_f('ids_storage') + ids.UUID + '/'

        device_id_info_out_table = device_id_info_out_dir + 'app_metrica_month'
        uuid_info_out_table = uuid_info_out_dir + 'app_metrica_month'

        with yt.Transaction() as transaction:
            mr.set_generate_date(device_id_info_out_table, self.date)
            mr.set_generate_date(uuid_info_out_table, self.date)

    def output(self):
        return [
            yt_luigi.YtDateTarget(t, self.date) for t in [
                self.out_f('ids_storage') + 'device_id/app_metrica_month',
                self.out_f('ids_storage') + 'uuid/app_metrica_month']]


def split_base_to_no_limit_tables(rec):
    yield {
        'uuid': rec[ids.UUID],
        'mmetric_devid': rec[ids.MMETRIC_DEVICE_ID],
        'devid': rec[ids.CRYPTA_DEVICE_ID],
        'first_date': rec['first_date'],
        'date': rec['first_date'],
        'last_date': rec['last_date'],
        '@table_index': 0
    }
    yield {
        'devid': rec[ids.CRYPTA_DEVICE_ID],
        'UUID': rec[ids.UUID],
        'DeviceID': rec[ids.MMETRIC_DEVICE_ID],
        'APIKey': rec['api_keys'],
        'AppID': rec['app_id'],
        'first_date': rec['first_date'],
        'last_date': rec['last_date'],
        '@table_index': 1
    }


def reduce_with_dates_extended(keys, recs):
    all_dates = set()
    appid_to_uuid = defaultdict(list)
    for rec in recs:
        all_dates.add(rec.get('first_date', None))
        all_dates.add(rec.get('last_date', None))
        if 'UUID' in rec.keys():
            api_keys = sum((i.keys() for i in rec.get("APIKey", [])), [])
            appid_uuid = rec['AppID'] + "_" + rec['UUID']
            appid_to_uuid[appid_uuid] += api_keys
        elif 'AppID_to_UUID' in rec.keys():
            for key, value in rec['AppID_to_UUID'].iteritems():
                appid_to_uuid[key] += value
    all_dates = filter(bool, all_dates)
    first_date = min(all_dates) if all_dates else None
    last_date = max(all_dates) if all_dates else None
    result_rec = {}
    result_rec['@table_index'] = 0
    result_rec['first_date'] = first_date
    result_rec['last_date'] = last_date
    result_rec['devid'] = keys['devid']
    result_rec['DeviceID'] = keys['DeviceID']
    result_rec['AppID_to_UUID'] = {
        key: sorted(set(value))
        for key, value in appid_to_uuid.iteritems()
    }

    OOM_LIMIT = 10000
    appid_uuid_pair_count = len(result_rec['AppID_to_UUID'])
    if appid_uuid_pair_count > OOM_LIMIT:
        # there are more than 10k appid-uuid pairs on this device, surely it can't be good
        result_rec['AppID_to_UUID'] = appid_uuid_pair_count
        result_rec['@table_index'] = 1

    yield result_rec


def reduce_with_dates_distinct(_, recs):
    all_dates = set()
    fin_rec = None
    for rec in recs:
        fin_rec = rec
        all_dates.add(rec.get('first_date', None))
        all_dates.add(rec.get('last_date', None))
    all_dates = filter(bool, all_dates)
    first_date = min(all_dates) if all_dates else None
    last_date = max(all_dates) if all_dates else None
    fin_rec['@table_index'] = 0
    fin_rec['first_date'] = first_date
    fin_rec['date'] = first_date
    fin_rec['last_date'] = last_date
    yield fin_rec


class UpdateUuidDevidIndeviceAllDict(yt_luigi.BaseYtTask):
    date = luigi.Parameter()

    def input_folders(self):
        return {
            'graph': config.YT_OUTPUT_FOLDER
        }

    def output_folders(self):
        return {
            'dict': config.GRAPH_YT_DICTS_FOLDER,
            'mobile': config.YT_OUTPUT_FOLDER + self.date + '/mobile/'
        }

    def requires(self):
        return AppMetricaDictMergeMonthTask(self.date)

    def run(self):
        mr.mkdir(self.out_f('dict'))
        mr.mkdir(self.out_f('mobile'))

        uuid_info_yt_ts = [t for t in
                           mr.get_date_tables(self.in_f('graph'), 'mobile/uuid_info_yt', int(config.STORE_DAYS))
                           if yt.exists(t)]

        mr.distinct_by(
            [ids.UUID, ids.MMETRIC_DEVICE_ID, ids.CRYPTA_DEVICE_ID, 'app_id'],
            uuid_info_yt_ts,
            self.out_f('mobile') + 'no_limit_tmp_base_table',
            additional_fields=[
                # input_field, output_field, is_list
                ('api_keys', 'api_keys', False, None),
                ('dates', 'first_date', True, min),
                ('dates', 'last_date', True, max),
            ]
        )

        # second destination table requested by d-sun-d
        yt.run_map(
            split_base_to_no_limit_tables,
            self.out_f('mobile') + 'no_limit_tmp_base_table',
            [
                self.out_f('mobile') + 'dev_uuid_indevice_perfect_no_limit_tmp',
                self.out_f('mobile') + 'uuid_dev_no_limit_extended_tmp',
            ]
        )

        # no limit tables
        no_limit_tables = [self.out_f('mobile') + 'dev_uuid_indevice_perfect_no_limit_tmp']
        if yt.exists(self.out_f('dict') + 'dev_uuid_indevice_perfect_no_limit'):
            no_limit_tables.append(self.out_f('dict') + 'dev_uuid_indevice_perfect_no_limit')
        mr.sort_all(no_limit_tables,
                    sort_by=['uuid', 'devid', 'mmetric_devid', 'first_date'])
        mr.create_table_with_schema(
            self.out_f('mobile') + 'dev_uuid_indevice_perfect_no_limit',
            app_metrica_dict_schemas.dict_table_schemas['dev_uuid_indevice_perfect_no_limit'],
            True
        )
        yt.run_reduce(reduce_with_dates_distinct,
                      no_limit_tables,
                      self.out_f('mobile') + 'dev_uuid_indevice_perfect_no_limit',
                      sort_by=['uuid', 'devid', 'mmetric_devid', 'first_date'],
                      reduce_by=['uuid', 'devid', 'mmetric_devid'])

        # copypaste for d-sun-d
        extended_tables = [self.out_f('mobile') + 'uuid_dev_no_limit_extended_tmp']
        if yt.exists(self.out_f('dict') + 'uuid_dev_no_limit_extended'):
            extended_tables.append(self.out_f('dict') + 'uuid_dev_no_limit_extended')
        mr.sort_all(extended_tables,
                    sort_by=['devid', 'DeviceID', 'first_date'])
        mr.create_table_with_schema(
            self.out_f('mobile') + 'uuid_dev_no_limit_extended',
            app_metrica_dict_schemas.dict_table_schemas['uuid_dev_no_limit_extended'],
            True
        )
        yt.run_reduce(reduce_with_dates_extended,
                      extended_tables,
                      [self.out_f('mobile') + 'uuid_dev_no_limit_extended',
                       self.out_f('mobile') + 'uuid_dev_no_limit_extended_errors'],
                      sort_by=['devid', 'DeviceID', 'first_date'],
                      reduce_by=['devid', 'DeviceID'])

        # prepare dicts
        utils.wait_all([
            yt.run_sort(self.out_f('mobile') + 'dev_uuid_indevice_perfect_no_limit',
                        self.out_f('dict') + 'dev_uuid_indevice_perfect_no_limit',
                        sort_by='uuid', sync=False),
            yt.run_sort(self.out_f('mobile') + 'uuid_dev_no_limit_extended',
                        self.out_f('dict') + 'uuid_dev_no_limit_extended',
                        sort_by='devid', sync=False),
        ])

        mr.drop(self.out_f('mobile') + 'no_limit_tmp_base_table')
        mr.drop(self.out_f('mobile') + 'uuid_dev_no_limit_extended')
        mr.drop(self.out_f('mobile') + 'uuid_dev_no_limit_extended_tmp')
        mr.drop(self.out_f('mobile') + 'dev_uuid_indevice_perfect_no_limit')
        mr.drop(self.out_f('mobile') + 'dev_uuid_indevice_perfect_no_limit_tmp')

        yt.set_attribute(self.out_f('dict') + 'dev_uuid_indevice_perfect_no_limit', '_format', 'yson')
        mr.set_generate_date(self.out_f('dict') + 'dev_uuid_indevice_perfect_no_limit', self.date)

    def output(self):
        return [yt_luigi.YtDateTarget(t, self.date) for t in
                [self.out_f('dict') + 'dev_uuid_indevice_perfect_no_limit']]


if __name__ == '__main__':
    yt.config.set_proxy(config.MR_SERVER)
    yt.config["tabular_data_format"] = yt.YsonFormat(process_table_index=True)

    task = AppMetricaDictMergeMonthTask('2017-07-12')

    luigi.build([task], workers=3, scheduler_port=8083)
