#!/usr/bin/python
# -*- coding: utf-8 -*-
import luigi
import yt.wrapper as yt

from lib.luigi import base_luigi_task
from lib.luigi import yt_luigi
from rtcconf import config
from utils import mr_utils as mr
from utils import utils

MAX_YUIDS_PER_DEVICE = 3

hash_columns = [
    'idfa_md5',
    'idfa_sha1',
    'android_id_md5',
    'android_id_sha1',
    'google_ad_id_md5',
    'google_ad_id_sha1'
]


def map_statbox_is(rec):
    deviceid = rec.get('deviceid', '')
    cryptaid = rec.get('cryptaid', '')
    yuid = rec.get('yuid', '')
    time = rec.get('unixtime', '')

    deviceid = utils.norm_id(deviceid)

    try:
        ts = int(time)
    except ValueError:
        return

    if deviceid and not deviceid.startswith('de00000000000') and time:
        if cryptaid:
            yield {'@table_index': 0, 'key': deviceid, 'cryptaid': cryptaid, 'type': 'devid', 'unixtime': -ts}
        if yuid:
            yield {'@table_index': 1, 'key': deviceid, 'yuid': yuid, 'unixtime': -ts}

    if cryptaid and yuid and time:
        yield {'@table_index': 0, 'key': yuid, 'cryptaid': cryptaid, 'type': 'yuid', 'unixtime': -ts}

    if cryptaid:
        for hash_column in hash_columns:
            if hash_column in rec:
                yield {'@table_index': 0, 'key': rec[hash_column], 'cryptaid': cryptaid, 'type': 'hash',
                       'hash_type': hash_column, 'unixtime': -ts}


def reduce_daily(_, recs):
    for rec in recs:
        rec['@table_index'] = 0
        yield rec
        return


def reduce_cid_month(key, recs):
    key = key['key']
    rec_type = ''
    first_rec = None

    for rec in recs:
        first_rec = rec
        break

    rec_type = first_rec['type']
    cryptaid = first_rec['cryptaid']

    if rec_type == 'yuid' or rec_type == 'devid':
        yield {'key': key, 'subkey': 'cryptaid', 'value': cryptaid}
        if rec_type == 'devid':
            yield {'key': key, 'subkey': 'cryptaid', 'value': cryptaid, '@table_index': 1}
        else:
            yield {'key': key, 'subkey': 'cryptaid', 'value': cryptaid, '@table_index': 2}

    if rec_type == 'hash':
        hash_type = first_rec['hash_type']
        yield {'hash': key, 'hash_type': hash_type, 'cryptaid': cryptaid, '@table_index': 3}


def reduce_daily_devid_yuid(_, recs):
    yuids = set()
    yuid_recs = []

    for rec in recs:
        yuid = rec['yuid']
        if yuid not in yuids:
            yuids.add(yuid)
            yuid_recs.append(rec)
        if len(yuids) >= MAX_YUIDS_PER_DEVICE:
            break

    for rec in yuid_recs:
        rec['@table_index'] = 0
        yield rec


def reduce_devid_yuid_month(key, recs):
    key = key['key']
    yuids = set()

    for rec in recs:
        yuids.add(rec['yuid'])
        if len(yuids) >= MAX_YUIDS_PER_DEVICE:
            break

    for value in yuids:
        yield {'key': key, 'subkey': 'yuid', 'value': value}


def run(date, out_folder):

    dates_is = set(mr.list_dates_before(config.IS_FOLDER, date, int(config.STORE_DAYS)))
    dates_done = mr.list_dates_before(config.IS_OUTPUT_FOLDER, date, int(config.STORE_DAYS))
    dates_done = set([dt for dt in dates_done if yt.exists(out_folder + dt + '/mapping_deviceid')])
    dates_todo = sorted(list(dates_is - dates_done), reverse=True)

    for date_todo in dates_todo:
        mr.mkdir(out_folder + date_todo)
        yt.run_map(map_statbox_is, config.IS_FOLDER + date_todo,
                   [out_folder + date_todo + '/mapping_cryptaid_tmp',
                    out_folder + date_todo + '/mapping_deviceid_tmp'])

    if dates_todo:
        utils.wait_all([yt.run_sort(out_folder + dt + '/mapping_cryptaid_tmp', sort_by=['key', 'unixtime'], sync=False)
                        for dt in dates_todo if yt.exists(out_folder + dt + '/mapping_cryptaid_tmp')])
        utils.wait_all([yt.run_sort(out_folder + dt + '/mapping_deviceid_tmp', sort_by=['key', 'unixtime'], sync=False)
                        for dt in dates_todo if yt.exists(out_folder + dt + '/mapping_deviceid_tmp')])

        utils.wait_all([yt.run_reduce(reduce_daily,
                                      out_folder + dt + '/mapping_cryptaid_tmp',
                                      out_folder + dt + '/mapping_cryptaid',
                                      sort_by=['key', 'unixtime'], reduce_by='key', sync=False)
                        for dt in dates_todo if yt.exists(out_folder + dt + '/mapping_cryptaid_tmp')])

        utils.wait_all([yt.run_reduce(reduce_daily_devid_yuid,
                                      out_folder + dt + '/mapping_deviceid_tmp',
                                      out_folder + dt + '/mapping_deviceid',
                                      sort_by=['key', 'unixtime'], reduce_by='key', sync=False)
                        for dt in dates_todo if yt.exists(out_folder + dt + '/mapping_deviceid_tmp')])

        utils.wait_all([yt.run_sort(out_folder + dt + '/mapping_cryptaid', sort_by=['key', 'unixtime'], sync=False)
                        for dt in dates_todo if yt.exists(out_folder + dt + '/mapping_cryptaid')])
        utils.wait_all([yt.run_sort(out_folder + dt + '/mapping_deviceid', sort_by=['key', 'unixtime'], sync=False)
                        for dt in dates_todo if yt.exists(out_folder + dt + '/mapping_deviceid')])

    yt.run_reduce(reduce_cid_month,
                  mr.get_date_tables(out_folder, 'mapping_cryptaid', int(config.STORE_DAYS)),
                  [out_folder + 'mapping_deviceid_yuid_cid', out_folder + 'mapping_deviceid_cid',
                   out_folder + 'mapping_yuid_cid', out_folder + 'mapping_hash_cid'],
                  sort_by=['key', 'unixtime'], reduce_by='key')

    yt.run_reduce(reduce_devid_yuid_month,
                  mr.get_date_tables(out_folder, 'mapping_deviceid', int(config.STORE_DAYS)),
                  out_folder + 'mapping_deviceid_yuid',
                  sort_by=['key', 'unixtime'], reduce_by='key')

    yt.run_sort(out_folder + 'mapping_yuid_cid', sort_by='key')  # to use in vertices reassign

    mr.set_generate_date(out_folder + 'mapping_deviceid_yuid_cid', date)
    mr.set_generate_date(out_folder + 'mapping_deviceid_cid', date)
    mr.set_generate_date(out_folder + 'mapping_yuid_cid', date)
    mr.set_generate_date(out_folder + 'mapping_deviceid_yuid', date)


class GraphCidTask(base_luigi_task.BaseTask):

    date = luigi.Parameter()

    def requires(self):
        return yt_luigi.ExternalInput(config.IS_FOLDER + self.date)

    def run(self):
        run(self.date, config.IS_OUTPUT_FOLDER)

    def output(self):
        out_folder = config.IS_OUTPUT_FOLDER
        return [yt_luigi.YtDateTarget(out_folder + 'mapping_deviceid_yuid_cid', self.date),
                yt_luigi.YtDateTarget(out_folder + 'mapping_deviceid_cid', self.date),
                yt_luigi.YtDateTarget(out_folder + 'mapping_yuid_cid', self.date),
                yt_luigi.YtDateTarget(out_folder + 'mapping_deviceid_yuid', self.date)]


def check_missing_yuids(date):
    def map_uniq_yuid_is(rec):
        if 'yuid' in rec and rec['yuid']:
            yield dict(yuid=rec['yuid'], src='log')

    def mk_uniq_yuid_reducer(src):
        def reduce_uniq_yuid_is(yuid, recs):
            yield dict(yuid=yuid['yuid'], src=src)
        return reduce_uniq_yuid_is

    def reduce_join_things(yuid, recs):
        srcs = set([r['src'] for r in recs])
        if 'log' in srcs:
            if 'tbl' in srcs or 'tbl-dev' in srcs:
                yield {'yuid': yuid['yuid'], '@table_index': 3}
            else:
                yield {'yuid': yuid['yuid'], '@table_index': 0}
        else:
            if 'tbl' in srcs or 'tbl-dev' in srcs:
                yield {'yuid': yuid['yuid'], '@table_index': 1}
            else:
                yield {'yuid': yuid['yuid'], '@table_index': 2}

    def map_mapping_cryptaid_tmp(rec):
        if rec['type'] == 'yuid':
            yield dict(yuid=rec['key'])

    def red_key(key, rec):
        for r in rec:
            pass
        yield dict(yuid=key['yuid'], src='tbl')

    log = config.IS_FOLDER + date
    tbl_mapped = config.IS_OUTPUT_FOLDER + date + '/mapping_cryptaid_tmp'
    tbl_mapped_devid = config.IS_OUTPUT_FOLDER + date + '/mapping_deviceid_tmp'
    log_yuids = config.IS_OUTPUT_FOLDER + date + '/log_yuids'
    tbl_yuids = config.IS_OUTPUT_FOLDER + date + '/tbl_yuids'
    dev_yuids = config.IS_OUTPUT_FOLDER + date + '/tbl_dev_yuids'
    all_tbl_yuids = config.IS_OUTPUT_FOLDER + date + '/all_tbl_yuids'
    combined_name = config.IS_OUTPUT_FOLDER + date + '/yuids_'

    yt.run_map_reduce(map_uniq_yuid_is, mk_uniq_yuid_reducer('log'), log, log_yuids, sort_by='yuid', reduce_by='yuid')
    yt.run_map_reduce(map_mapping_cryptaid_tmp, mk_uniq_yuid_reducer('tbl'), tbl_mapped, tbl_yuids, sort_by='yuid', reduce_by='yuid')
    yt.run_map_reduce(map_uniq_yuid_is, mk_uniq_yuid_reducer('tbl-dev'), tbl_mapped_devid, dev_yuids, sort_by='yuid', reduce_by='yuid')
    yt.run_sort([tbl_yuids, dev_yuids], all_tbl_yuids, sort_by='yuid')
    yt.run_reduce(red_key, all_tbl_yuids, all_tbl_yuids, reduce_by='yuid')

    yt.run_map_reduce(None, reduce_join_things,
                      [log_yuids, all_tbl_yuids],
                      [combined_name + 'only_log', combined_name + 'only_tbl', combined_name + 'wtf', combined_name + 'both_tbl_log'],
                      sort_by='yuid',
                      reduce_by='yuid')

    yt.remove(log_yuids)

    if yt.row_count(combined_name + 'only_log') > 0:
        raise Exception('There are some missing yuids in IS log!')


class LostIsYuidsMonitoringTask(base_luigi_task.BaseTask):
    date = luigi.Parameter()

    def requires(self):
        return GraphCidTask(self.date)

    def run(self):
        try:
            check_missing_yuids(self.date)
        except Exception as e:
            utils.monrun_luigi_error(self, e)

    def output(self):
        missing_yuids = config.IS_OUTPUT_FOLDER + self.date + '/yuids_only_log'
        return yt_luigi.YtTarget(missing_yuids, allow_empty=True)
