# -*- coding: utf-8 -*-
import os
import subprocess
import operator

from copy import copy
from collections import defaultdict
from functools import partial

import luigi
import yt.wrapper as yt

from lib.luigi import base_luigi_task, yt_luigi

import decrypt_watchlog_browserinfo as dwb

from utils import mr_utils as mr, uat_utils, utils

from data_imports.day_aggregate import reduce_device_log_events_day

from rtcconf import config
from v2.soup import soup_config
from v2.soup.soup_tables import SoupDailyLogTable

from utils.yql_utils import run_yql


def reduce_by_ip_hist(key, recs):
    yuid = key['yuid']
    if yuid == '0':
        return
    ip = key['ip']
    hour_hist = defaultdict(lambda: 0)
    for rec in recs:
        ts = rec['ts']
        minute5 = ts / 300
        hour_hist[minute5] += 1

    yield {'yuid': yuid, 'ip': ip, 'hour_hist': utils.default_to_regular(hour_hist)}


class AppMetricaSocketsMapper(object):

    def __init__(self, start_table_index, soup_table_index):
        self.start_table_index = start_table_index
        self.soup_table_index = soup_table_index
        self.decryptor = None

    def get_source_type(self, user_agent):
        if user_agent:
            os = uat_utils.Ua(user_agent).profile_info.get('OSFamily')
            if os == 'iOS':
                return config.ID_SOURCE_TYPE_METRICA_SOCKETS_IOS

        # assume by default
        return config.ID_SOURCE_TYPE_METRICA_SOCKETS_ANDROID

    def __call__(self, rec):
        if self.decryptor is None:
            self.decryptor = dwb.DecryptMapper(
                os.environ.get('YT_SECURE_VAULT_METRIKA_PRIVATE_KEY', None))

        for r in self.decryptor(rec):

            source_type = self.get_source_type(rec.get('useragent'))
            r['source_type'] = source_type

            if source_type == config.ID_SOURCE_TYPE_METRICA_SOCKETS_ANDROID:
                if 'devid' in r:
                    r['@table_index'] = self.start_table_index
                    soup_rec = SoupDailyLogTable.make_rec(
                        r['yuid'], r['devid'], soup_config.yuid_devid_socket_android,
                        r['ts'], table_index=self.soup_table_index)
                elif 'uuid' in r:
                    r['@table_index'] = self.start_table_index + 1
                    soup_rec = SoupDailyLogTable.make_rec(
                        r['yuid'], r['uuid'], soup_config.yuid_uuid_socket_android,
                        r['ts'], table_index=self.soup_table_index)
                else:
                    raise Exception("No uuid or devid found")

            else:
                if 'devid' in r:
                    r['@table_index'] = self.start_table_index + 2
                    soup_rec = SoupDailyLogTable.make_rec(
                        r['yuid'], r['devid'], soup_config.yuid_devid_socket_ios,
                        r['ts'], table_index=self.soup_table_index)
                elif 'uuid' in r:
                    r['@table_index'] = self.start_table_index + 3
                    soup_rec = SoupDailyLogTable.make_rec(
                        r['yuid'], r['uuid'], soup_config.yuid_uuid_socket_ios,
                        r['ts'], table_index=self.soup_table_index)
                else:
                    raise Exception("No uuid or devid found")

            yield r
            yield soup_rec


def run_one_day(workdir, dt, devid_raw_f, soup_log_table):
    mr.mkdir(workdir)
    mr.mkdir(devid_raw_f)
    # import log
    output_tables = [
        (soup_log_table, None),
        (workdir + 'devid_yuid_' + config.ID_SOURCE_TYPE_METRICA_SOCKETS_ANDROID,
            [config.ID_TYPE_DEVID, config.ID_TYPE_YUID, 'ts']),
        (workdir + 'uuid_yuid_' + config.ID_SOURCE_TYPE_METRICA_SOCKETS_ANDROID,
            [config.ID_TYPE_UUID, config.ID_TYPE_YUID, 'ts']),
        (workdir + 'devid_yuid_' + config.ID_SOURCE_TYPE_METRICA_SOCKETS_IOS,
            [config.ID_TYPE_DEVID, config.ID_TYPE_YUID, 'ts']),
        (workdir + 'uuid_yuid_' + config.ID_SOURCE_TYPE_METRICA_SOCKETS_IOS,
            [config.ID_TYPE_UUID, config.ID_TYPE_YUID, 'ts']),
    ]

    with open(config.METRICA_RSA_KEY_PATH) as f:
        rsakey = f.read()

    filtered_for_metrica_table = workdir + 'watch_log_filtered_metrica_sockets'
    soup_table_index = 0  # all soup records to single tmp log table
    sockets_table_index = 1  # takes also #2, #3, #4
    app_metrica_sockets_mapper = AppMetricaSocketsMapper(
        start_table_index=sockets_table_index, soup_table_index=soup_table_index)

    spec = copy(mr.DATA_SIZE_PER_JOB_2GB_SPEC)
    spec['secure_vault'] = {'METRIKA_PRIVATE_KEY': rsakey}

    yt.run_map(
        app_metrica_sockets_mapper, filtered_for_metrica_table,
        map(operator.itemgetter(0), output_tables), spec=spec)

    # prepare old-style day data
    utils.wait_all(
        yt.run_sort(t, sort_by=sort_by, sync=False)
        for t, sort_by in output_tables if sort_by)

    ops = []
    for id_type, source_type in [
        (config.ID_TYPE_DEVID, config.ID_SOURCE_TYPE_METRICA_SOCKETS_ANDROID),
        (config.ID_TYPE_UUID, config.ID_SOURCE_TYPE_METRICA_SOCKETS_ANDROID),
        (config.ID_TYPE_DEVID, config.ID_SOURCE_TYPE_METRICA_SOCKETS_IOS),
        (config.ID_TYPE_UUID, config.ID_SOURCE_TYPE_METRICA_SOCKETS_IOS),
    ]:
        op = yt.run_reduce(
            partial(reduce_device_log_events_day, dt=dt, source_type=source_type),
            workdir + '%s_yuid_%s' % (id_type, source_type),
            devid_raw_f + '%s_yuid_%s' % (id_type, source_type),
            sort_by=[id_type, config.ID_TYPE_YUID, 'ts'],
            reduce_by=[id_type, config.ID_TYPE_YUID], sync=False
        )
        ops.append(op)

    utils.wait_all(ops)
    mr.drop(filtered_for_metrica_table)


class ImportWatchLogDayTask(yt_luigi.BaseYtTask):

    date = luigi.Parameter()
    run_date = luigi.Parameter()

    resources = {'import_watch_log_lock': 1}
    priority = 1

    def __init__(self, *args, **kwargs):
        super(ImportWatchLogDayTask, self).__init__(*args, **kwargs)
        self.soup_log = SoupDailyLogTable(soup_config.LOG_SOURCE_WATCH_LOG, self.date)

    def input_folders(self):
        return {
            'statbox_watch_log': config.STATBOX_BS_WATCH_FOLDER,
            'dict': config.GRAPH_YT_DICTS_FOLDER
        }

    def workdir(self):
        return config.YT_OUTPUT_FOLDER + self.date + '/raw_links/'

    def output_folders(self):
        return {
            'yuid_raw': config.YT_OUTPUT_FOLDER + self.date + '/yuid_raw/',
            'devid_raw': config.INDEVICE_YT_FOLDER + self.date + '/perfect/devid_raw_day/',
            'ecommerce': config.YT_OUTPUT_FOLDER + self.date + '/ecommerce/'
        }

    def requires(self):
        return [yt_luigi.ExternalInput(self.in_f('statbox_watch_log') + self.date)]

    def run(self):

        run_yql('WLParser', dict(date=self.date), {
            'GRAPH_YT_OUTPUT_FOLDER': config.YT_OUTPUT_FOLDER,
            'INDEVICE_YT_FOLDER': config.INDEVICE_YT_FOLDER,
            'BSWATCH_LOG_DIR': config.LOG_FOLDERS['bs_watch']
        })

        run_one_day(self.workdir(), self.date, self.out_f('devid_raw'), self.soup_log.create())
        self.soup_log.prepare_daily_tables_from_log()

    def output(self):
        yuid_raw_folder = self.out_f('yuid_raw')
        devid_raw_folder = self.out_f('devid_raw')
        ecommerce_folder = self.out_f('ecommerce')

        if self.date == self.run_date:
            soup_out_tables = self.soup_log.daily_tables_targets()
        else:
            soup_out_tables = []

        return [
            yt_luigi.YtTarget(
                yuid_raw_folder + 'yuid_with_' + config.ID_TYPE_EMAIL + '_' + config.ID_SOURCE_TYPE_PAGE_TITLE),
            yt_luigi.YtTarget(
                yuid_raw_folder + 'yuid_with_' + config.ID_TYPE_VKCOM + '_' + config.ID_SOURCE_TYPE_WATCH_LOG,
                allow_empty=True),  # not available for all 30 days yet
            yt_luigi.YtTarget(
                yuid_raw_folder + 'yuid_with_' + config.ID_TYPE_OKRU + '_' + config.ID_SOURCE_TYPE_WATCH_LOG,
                allow_empty=True),  # not available for all 30 days yet
            yt_luigi.YtTarget(
                yuid_raw_folder + 'yuid_with_' + config.ID_TYPE_AVITO + '_' + config.ID_SOURCE_TYPE_WATCH_LOG,
                allow_empty=True),  # not available for all 30 days yet
            yt_luigi.YtTarget(
                yuid_raw_folder + 'yuid_with_' + config.ID_TYPE_EMAIL + '_' + config.ID_SOURCE_TYPE_WATCH_LOG_MAILRU,
                allow_empty=True),  # not available for all 30 days yet
            yt_luigi.YtTarget(
                yuid_raw_folder + 'yuid_with_' + config.ID_TYPE_MAC + '_' + config.ID_SOURCE_TYPE_VMETRO,
                allow_empty=True),  # not available for all 30 days yet
            yt_luigi.YtTarget(devid_raw_folder + 'uuid_yuid_' + config.ID_SOURCE_TYPE_WATCH_LOG),
            yt_luigi.YtTarget(ecommerce_folder + 'yuid_with_purchase_day')
        ] + soup_out_tables


class IpHistTask(base_luigi_task.BaseTask):
    date = luigi.Parameter()

    def requires(self):
        return ImportWatchLogDayTask(date=self.date, run_date=self.date)

    def run(self):
        input_table = self.input()[0].table
        output_tables = [output.table for output in self.output()]

        yt.run_sort(input_table, sort_by=['yuid', 'ip'])
        yt.run_reduce(reduce_by_ip_hist, input_table, output_tables, reduce_by=['yuid', 'ip'])

    def output(self):
        graph_date_folder = config.YT_OUTPUT_FOLDER + self.date
        return [yt_luigi.YtTarget(graph_date_folder + '/raw_links/yuid_ip_ts_hist')]


if __name__ == '__main__':
    yt.config.set_proxy(config.MR_SERVER)

    # dt = sys.argv[1]
    # luigi.run(['ImportWatchLogDayTask', '--date', dt, '--workers', '10', '--scheduler-port', '8083'])

    workdir = '//home/crypta/team/artembelov/sockets/map/'
    mr.mkdir(workdir)

    output_tables = [
        # workdir + 'yuid_ip_ts',
        workdir + 'yuid_with_' + config.ID_SOURCE_TYPE_PAGE_TITLE,
        workdir + 'yuid_with_' + config.ID_TYPE_EMAIL + '_' + config.ID_SOURCE_TYPE_WATCH_LOG_MAILRU,
        workdir + 'yuid_with_' + config.ID_TYPE_MAC + '_' + config.ID_SOURCE_TYPE_VMETRO,
        workdir + 'yuid_with_' + config.ID_TYPE_VKCOM + '_' + config.ID_SOURCE_TYPE_WATCH_LOG,
        workdir + 'yuid_with_' + config.ID_TYPE_OKRU + '_' + config.ID_SOURCE_TYPE_WATCH_LOG,
        workdir + 'yuid_with_' + config.ID_TYPE_AVITO + '_' + config.ID_SOURCE_TYPE_WATCH_LOG,
        workdir + 'uuid_yuid_' + config.ID_SOURCE_TYPE_WATCH_LOG,
        workdir + 'yuid_purchase_log',
        workdir + 'soup',
        workdir + 'devid_yuid_' + config.ID_SOURCE_TYPE_METRICA_SOCKETS_ANDROID,
        workdir + 'uuid_yuid_' + config.ID_SOURCE_TYPE_METRICA_SOCKETS_ANDROID,
        workdir + 'devid_yuid_' + config.ID_SOURCE_TYPE_METRICA_SOCKETS_IOS,
        workdir + 'uuid_yuid_' + config.ID_SOURCE_TYPE_METRICA_SOCKETS_IOS,
    ]

    rsakey = ''''''

    yt.run_map(
        partial(map_watch_log),
        '//logs/bs-watch-log/1d/2017-11-12',
        output_tables,
        spec=mr.DATA_SIZE_PER_JOB_2GB_SPEC)
