#!/usr/bin/env python

import luigi
import yt.wrapper as yt

from data_imports.import_logs.graph_import_fp import ImportFPDayTask
from data_imports.import_logs.webvisor.graph_webvisor import ImportWebvisorTask
from features import TYuidFeatures
from lib.luigi import base_luigi_task
from lib.luigi import yt_luigi
from rtcconf import config


# ----- UTILS -----
def parse_tskv(s, sep='\t', kv_sep='=') :
    return dict([(p.split(kv_sep, 1) if kv_sep in p else [p, '']) for p in s.split(sep)])

# make dir, recursive (like 'mkdir -p')
def mkdir_p(d):
    if d not in ['', '/', '//']:
        if not yt.exists(d):
            mkdir_p(d.rsplit('/', 1)[0])
            yt.create('map_node', d, ignore_existing=True)


# ----- YUID FEATURES -----
# !!! not used now. candidate to be removed !!!
# fp -> yuid features for single event (or several events)
# (ip=5.18.184.194 \t yandexuid=... | | version=0 \t history=1448478426:r,1448478627:r)
# --> (key=yuid | hist=0:5,1:13,... )
def fp_to_features(rec):
    key_tskv = parse_tskv(rec['key'])
    if 'yandexuid' in key_tskv and len(key_tskv['yandexuid']) > 5:
        f = TYuidFeatures()
        f.parse_fp(rec)
        yield {'key': key_tskv['yandexuid'], 'features': f.to_string()}


# ip_yuid_stream -> yuid features for single event
# (1.0.156.55|m|phone|samsung|android|4.1.2 | b | user_agent=... ts=1447857223 yuid=2298698291430398656)
# --> (yuid | b | hist=0:5,1:13,... )
def ip_yuid_stream_to_features(rec):
    tskv = parse_tskv(rec['value'])
    if 'yuid' in tskv and 'ts' in tskv :
        f = TYuidFeatures()
        f.hist.add_ts(int(tskv['ts']))
        yield {'key': tskv['yuid'], 'features': f.to_string()}


# ----- MERGE FEATURES -----
# (key=yuid | hist=0:5,1:13,... )
# --> (key=yuid | hist=0:5,1:13,...)
#           or
# (key=deviceid | | hist=<histagrams for several apps> )
# --> (key=deviceid | | hist=<histagrams for several apps> )
class merge_features(object):
    def __init__(self, feature_fabric):
        self.feature_fabric = feature_fabric    # TYuidFeatures or TDevFeatures
    def __call__(self, key, recs):
        f = self.feature_fabric()
        for rec in recs:
            f.merge(self.feature_fabric(rec['features']))
        yield {'key': key['key'], 'features': f.to_string()}


# ----- TASK -----
def runYT(date):
    yt.config.set_proxy(config.MR_SERVER)

    indev_folder = config.INDEVICE_YT_FOLDER
    yt_out_folder = config.YT_OUTPUT_FOLDER

    ip_yuid_stream = yt_out_folder + date + '/ip_yuid_stream'
    yuid_features = indev_folder + date + '/fuzzy/yuid_features'

    mkdir_p(indev_folder + date + '/fuzzy')

    # yuid features
    yt.run_map(ip_yuid_stream_to_features, ip_yuid_stream, yuid_features)
    yt.run_sort(yuid_features, sort_by='key')
    yt.run_reduce(merge_features(TYuidFeatures), yuid_features, yuid_features, reduce_by='key')
    yt.run_sort(yuid_features, sort_by='key')


class PrepareIndeviceFeaturesDayTask(base_luigi_task.BaseTask):
    date = luigi.Parameter()
    run_date = luigi.Parameter()

    def requires(self):
        return [
            ImportFPDayTask(date=self.date, run_date=self.run_date),
            ImportWebvisorTask(date=self.date, run_date=self.run_date),
        ]

    def run(self):
        runYT(self.date)

    def output(self):
        yuid_features = config.INDEVICE_YT_FOLDER + self.date + '/fuzzy/yuid_features'
        return [yt_luigi.YtTarget(yuid_features)]

