import luigi
import yt.wrapper as yt

from lib.luigi import base_luigi_task
from utils import mr_utils as mr
from utils import utils
from lib.luigi import yt_luigi
from matching.device_matching.fuzzy_heuristic.match_by_ip_day import DeviceYuidsFuzzyIpMatchingDayTask, get_table_name, device_yuid_fuzzy_pairs
from matching.device_matching.fuzzy_heuristic.paths import yuid_device_count, device_yuid
from rtcconf import config


def reduce_yuid_device_count(key, recs):
    parts = key['key'].split('_')
    devid = parts[0]
    yuid = parts[1]
    ipcount_sum, hit_sum = 0, 0
    prev_subkey, user_agent, wapprofile = '', '', ''
    for rec in recs:
        if not user_agent:
            user_agent = mr.get_field_value('user_agent', rec['value'])
        if not wapprofile:
            wapprofile = mr.get_field_value('wapprofile', rec['value'])
        if rec['subkey'] != prev_subkey:
            ipcount_sum += 1
            prev_subkey = rec['subkey']
        hit = mr.get_field_value('hit', rec['value'])
        hit_sum += int(hit)

    value = 'ipcount=' + str(ipcount_sum) + '\thit=' + str(hit_sum) + '\tuser_agent=' + user_agent
    if wapprofile:
        value += '\twapprofile=' + wapprofile
    yield dict(key=yuid, subkey=devid, value=value)


def reduce_device_yuid(key, recs):
    hit, ipcount, max_ipcount, prev_max_ipcount = 0, 0, 0, 0
    maxrec = None
    device_candidates = 0
    user_agent, wapprofile = '', ''

    for rec in recs:
        if not user_agent:
            user_agent = mr.get_field_value('user_agent', rec['value'])
            wapprofile = mr.get_field_value('wapprofile', rec['value'])
        ipcount = int(mr.get_field_value('ipcount', rec['value']))
        hit = int(mr.get_field_value('hit', rec['value']))

        if ipcount > max_ipcount:
            maxrec = rec
            max_ipcount = ipcount
        if prev_max_ipcount == 0:
            prev_max_ipcount = max_ipcount
        elif max_ipcount > ipcount > prev_max_ipcount:
            prev_max_ipcount = ipcount
        device_candidates += 1

    value = ''
    if device_candidates == 1:
        if ipcount > 1 or hit >= config.HIT_THRESHOLD:
            value = 'candidates=1\tipcount=' + str(ipcount) + '\thit=' + str(hit) + '\tua=' + user_agent
    elif max_ipcount / prev_max_ipcount >= config.MAX_PREV_RATIO:
        hit = mr.get_field_value('hit', maxrec['value'])
        value = 'candidates=' + str(device_candidates) + '\tipcount=' + str(max_ipcount) \
                + '\tprev_ipcount=' + str(prev_max_ipcount) + '\thit=' + str(hit) + '\tua=' + user_agent
    if value:
        if wapprofile:
            value += '\twp=' + wapprofile
        yield dict(key=maxrec['subkey'], subkey=key['key'], value=value + '\tperfect=0')


class DeviceYuidsMergeFuzzyDictMonthTask(base_luigi_task.BaseTask):
    date = luigi.Parameter()

    def requires(self):
        return [DeviceYuidsFuzzyIpMatchingDayTask(date=d, run_date=self.date) for d in
                utils.get_dates_before(self.date, int(config.STORE_DAYS))]

    def run(self):
        dt = self.date
        # Generate intersection counts for all available days
        pair_tables = mr.get_date_tables(
            config.INDEVICE_YT_FOLDER, 'fuzzy/' + device_yuid_fuzzy_pairs, int(config.STORE_DAYS)
        )
        yt.run_map_reduce(None, reduce_yuid_device_count,
                          [x for x in pair_tables if yt.exists(x)],
                          get_table_name(dt, yuid_device_count),
                          reduce_by='key', sort_by=['key', 'subkey'])
        yt.run_map_reduce(None, reduce_device_yuid,
                          get_table_name(dt, yuid_device_count),
                          [get_table_name(dt, device_yuid)],
                          reduce_by='key', sort_by=['key', 'subkey'])

        mr.drop(get_table_name(dt, yuid_device_count))

    def output(self):
        return yt_luigi.YtTarget(get_table_name(self.date, device_yuid))
