#!/usr/bin/env python

import itertools
import os

import luigi
import yt.wrapper as yt

from features import TYuidFeatures, TDevFeatures
from lib.luigi import base_luigi_task
from lib.luigi import yt_luigi
from prepare_features import PrepareIndeviceFeaturesDayTask, merge_features
from rtcconf import config
from utils import mr_utils as mr
from utils import utils
from v2 import ids


# ----- UTILS -----
def parse_tskv(s, sep='\t', kv_sep='=') :
    return dict([(p.split(kv_sep, 1) if kv_sep in p else [p, '']) for p in s.split(sep)])


# make dir, recursive (like 'mkdir -p')
def mkdir_p(d):
    if d not in ['', '/', '//']:
        if not yt.exists(d):
            mkdir_p(d.rsplit('/', 1)[0])
            yt.create('map_node', d, ignore_existing=True)


# ----- FEATURES TO MATRIXNET FORMAT RULES -----
sign_threshold = 0.1
app_list = ["com.yandex.browser", "ru.yandex.searchplugin", "com.avito.android",
                "ru.yandex.yandexmaps", "ru.yandex.yandexnavi"]
def reformat_features(rec_id, yfeat, dfeat) :
    features = yfeat.hist.get_count_list()
    for app in app_list :
        features += dfeat.hists[app].get_count_list()
    features = map(lambda x: 1. if x > sign_threshold else 0., features)

    fields = [rec_id] + map(str, features)
    return fields


# ----- COLLECT AND PREPARE FEATURES -----
def prepare_candidates(rec):
    devid, yuid = rec['key'].rsplit('_', 1)
    yield {'key': yuid, 'pkey': devid, '@table_index': 0}
    yield {'key': devid, 'pkey': devid, '@table_index': 1}


class add_features_to_candidates(object):
    def __init__(self, feature_fabric, feature_name):
        self.reducer = merge_features(feature_fabric)
        self.feature_name = feature_name
    def __call__(self, key, rec):
        pkey = None
        rec_iter = iter(rec)
        try:
            next_rec = next(rec_iter)
            while 'pkey' in next_rec:
                pkey = next_rec['pkey']
                next_rec = next(rec_iter)
            if pkey is not None:
                for rec in self.reducer(key, itertools.chain(iter([next_rec]), rec_iter)) :
                    yield {'pkey': pkey, 'id': key['key'], self.feature_name: rec['features']}
        except StopIteration:
            pass


def collect_features(key, recs):
    yuid, devid, yfeat, dfeat = (None, None, None, None)
    for rec in recs :
        if 'yfeat' in rec :
            yuid = rec['id']
            yfeat = TYuidFeatures(rec['yfeat'])
        if 'dfeat' in rec :
            devid = rec['id']
            dfeat = TDevFeatures(rec['dfeat'])
    if yfeat is not None and dfeat is not None :
        k = devid + '_' + yuid
        if any([c in ['\n', '\t', '\0'] for c in k]):
            return
        fields = reformat_features(k, yfeat, dfeat)
        yield {'key' : fields[0], 'value': '\t'.join(fields[1:]), '@table_index': 0}


# ----- CLASSIFY -----
def run_classification(src_table, dst_table):
    import sh
    mx_ops_bin = os.getenv('MX_OPS_BIN') # config.MX_OPS_BIN
    model = os.getenv('INDEVICE_UNPERFECT_MODEL') # config.INDEVICE_UNPERFECT_MODEL
    mr_exec = sh.Command(config.MR_EXEC)
    mr_exec("-server", config.MR_SERVER, "-map",
            "./%s calc -s 1 %s" % (mx_ops_bin.rsplit('/', 1)[-1], model.rsplit('/', 1)[-1]),
            "-src", src_table, "-dst", dst_table, "-file", mx_ops_bin, "-file", model)


def filter_classified(rec):
    if float(rec['value']) > 0 :
        devid, yuid = rec['key'].rsplit('_', 1)
        yield {'key': devid, 'subkey': yuid, 'value': "type=mn\tprob=%s\tperfect=0" % rec['value']}


def map_features(rec):
    features = rec.get('features')
    if features:
        yield {'key': rec[ids.CRYPTA_DEVICE_ID], 'features': features}


# ----- TASK -----
def run(date, out_table):
    yt.config.set_proxy(config.MR_SERVER)

    indev_folder = config.INDEVICE_YT_FOLDER
    graph_folder = config.GRAPH_YT_OUTPUT_FOLDER

    tmp_dir = indev_folder + 'tmp/'
    mkdir_p(tmp_dir.rsplit('/', 1)[0])

    pair_tables = mr.get_date_tables(indev_folder, 'fuzzy/device_yuid_fuzzy_pairs_day', int(config.STORE_DAYS))
    y_feature_tables = mr.get_date_tables(indev_folder, 'fuzzy/yuid_features', int(config.STORE_DAYS))
    d_feature_tables = mr.get_date_tables(graph_folder, 'mobile/dev_info_yt', int(config.STORE_DAYS))

    yt.run_map(prepare_candidates, pair_tables, [tmp_dir+"_y_candidates", tmp_dir+"_d_candidates"])
    yt.run_map(map_features,
               d_feature_tables,
               tmp_dir+"_d_features_merged")

    yt.run_sort(tmp_dir+"_y_candidates", sort_by='key')
    yt.run_reduce(add_features_to_candidates(TYuidFeatures, "yfeat"),
                  [tmp_dir+"_y_candidates"] + y_feature_tables,
                  tmp_dir+"_yfeat",
                  reduce_by='key', memory_limit=512 * 1024 * 1024)

    mr.sort_all([
        tmp_dir + "_d_candidates",
        tmp_dir + "_d_features_merged"
    ], sort_by='key')
    yt.run_reduce(add_features_to_candidates(TDevFeatures, "dfeat"),
                  [tmp_dir+"_d_candidates", tmp_dir+"_d_features_merged"],
                  tmp_dir+"_dfeat",
                  reduce_by='key', memory_limit=512 * 1024 * 1024)

    yt.run_sort(tmp_dir+"_yfeat", sort_by='pkey')
    yt.run_sort(tmp_dir+"_dfeat", sort_by='pkey')
    yt.run_reduce(collect_features, [tmp_dir+"_yfeat", tmp_dir+"_dfeat"], tmp_dir+"_features", reduce_by='pkey')

    run_classification(tmp_dir+"_features", tmp_dir+"_classified")
    yt.run_map(filter_classified, tmp_dir+"_classified", out_table)

    # cleanup
    yt.remove(tmp_dir+"_d_features_merged")
    yt.remove(tmp_dir+"_y_candidates")
    yt.remove(tmp_dir+"_d_candidates")
    yt.remove(tmp_dir+"_yfeat")
    yt.remove(tmp_dir+"_dfeat")
    yt.remove(tmp_dir+"_features")
    yt.remove(tmp_dir+"_classified")


class IndeviceClassifyTask(base_luigi_task.BaseTask):
    date = luigi.Parameter()

    def __init__(self, *args, **kwargs):
        super(IndeviceClassifyTask, self).__init__(*args, **kwargs)
        self.out_table = config.INDEVICE_YT_FOLDER + self.date + "/fuzzy/dev_yuid_unperfect_ml"

    def requires(self):
        from data_imports.import_logs.app_metrica_day import ImportAppMetrikaDayTask
        from matching.device_matching.fuzzy_heuristic.match_by_ip_day import DeviceYuidsFuzzyIpMatchingDayTask
        for dt in utils.get_dates_before(self.date, int(config.STORE_DAYS)):
            yield PrepareIndeviceFeaturesDayTask(date=dt, run_date=self.date)
            yield DeviceYuidsFuzzyIpMatchingDayTask(date=dt, run_date=self.date)
            yield ImportAppMetrikaDayTask(date=dt, run_date=self.date)

    def run(self):
        run(self.date, self.out_table)

    def output(self):
        return [yt_luigi.YtTarget(self.out_table)]
