#-*- coding: UTF-8 -*-
FUTURE_DAYS_COUNT = 7
PAST_DAYS_COUNT = 28

import requests
import nile
import argparse
import time
from nile.api.v1 import (
    filters as nf,
    aggregators as na,
    extractors as ne,
    statface as ns,
    clusters,
    Record
)
from qb2.api.v1 import (
    extractors as se,
    filters as sf
)
import datetime
import yt.wrapper as yt
import json
import os

def push_to_stat_new(report_table, report=None):
    client = ns.StatfaceClient(proxy='upload.stat.yandex-team.ru',
                               username='robot_msvvitaly',
                               token=os.environ['STAT_TOKEN'])

    ns.StatfaceReport().path(report) \
                       .scale('daily') \
                       .client(client) \
                       .remote_publish(proxy='banach',
                                       table_path=report_table,
                                       async_mode=False,
                                       upload_config=False)

def recommendations_reformatter(recs):
    import json
    type_mapper = {0 : "CATEG_FILM",
                   1 : "CATEG_SERIES",
                   2 : "CATEG_ANIM_FILM",
                   3 : "CATEG_ANIM_SERIES",
                   4 : "CATEG_TV_SHOW",
                   5 : "CATEG_MIXED",
                   6 : "CATEG_WATCHED"}
    for rec in recs:
        a = json.loads(rec["value"])
        results = {}
        for category_info in a["netflix"]["categories"]:
            predicted_objects = []
            for query_info in category_info["queries"]:
                predicted_objects.append({"object_id" : query_info["ontoid"]})
            results[type_mapper[category_info["type"]]] = predicted_objects
        uids = []
        if "cid_" in rec["key"]:
            for uid in rec["aliases"]:
                uids.append(uid)
        else:
            uids.append(rec["key"])
        for uid in uids:
            yield Record(uid=uid, results=json.dumps(results))

class upgradeability_calc(object):
    def __init__(self, cold_start):
        self.cold_start = cold_start
    def __call__(self, recs):
        for rec in recs:
            results = json.loads(rec["results"])
            if "prev_results" in rec["results"]:
                prev_results = json.loads(rec["prev_results"])
            else:
                prev_results = self.cold_start

            for category in results:
                predicted_objects = [result["object_id"] for result in results[category]]
                predicted_prev_objects = [result["object_id"] for result in prev_results[category]]
                upgradability_top_20 = 100. * (1. - float(len(set(predicted_objects[:20]).intersection(set(predicted_prev_objects[:20])))) / 20.)
                upgradability = 100. * (1. - float(len(set(predicted_objects).intersection(set(predicted_prev_objects)))) / float(len(predicted_objects)))
                yield Record(category_type=category, upgradability_top_20=upgradability_top_20, upgradability=upgradability)

def grep_cold_start(recs):
    for rec in recs:
        if rec.get('key') == '0':
            yield Record(value=rec['value'], key='0')

RECOMMENDATIONS_PREFIX = '//home/videoindex/recommender/backup/vitrina'
RECOMMENDATIONS_SUFFIX = '/recommendations_merged.'
RECOMMENDATIONS_VERSIONS = ["filter_none", "filter_basic", "filter_family", "filter_tv_app", "filter_vh"]
RECOMMENDATIONS_SUFFIX_SUFFIX = '.json'
DATES_RANGES = [7, 3]

def main():
    cluster = clusters.Banach().env(parallel_operations_limit=10)
    current_date = datetime.datetime.now()
    one_day = datetime.timedelta(days=1)
    date = current_date - one_day
    date_str = date.strftime("%Y%m%d")
    fielddate = date.strftime("%Y-%m-%d")

    while not cluster.driver.exists(RECOMMENDATIONS_PREFIX + "/" + date_str + RECOMMENDATIONS_SUFFIX + "filter_none" + RECOMMENDATIONS_SUFFIX_SUFFIX):
        print "No tables"
        time.sleep(100)

    print date

    for date_range in DATES_RANGES:
        prev_date = date - datetime.timedelta(days=date_range)
        prev_date_str = prev_date.strftime("%Y%m%d")
        print prev_date_str
        ### Get cold start for prev date
        base_version = '//home/videoindex/recommender/backup/vitrina/' + prev_date_str + '/recommendations_merged.filter_none.json'
        current_base_version = '//home/videoindex/recommender/backup/vitrina/' + date_str + '/recommendations_merged.filter_none.json'
        cold_start_table = '//tmp/mma-1523_' + prev_date_str + '_cold_start'
        job = cluster.job()
        job.table(base_version) \
           .map(grep_cold_start) \
           .map(recommendations_reformatter) \
           .put(cold_start_table)
        job.run()
        cold_start = None
        for rec in cluster.driver.read(cold_start_table):
            cold_start = json.loads(rec['results'])

        ### Calc upgradability for cold start
        result_table = "//tmp/mma-1523/" + date_str + '_cold_start_' + str(date_range)
        job = cluster.job()
        current_recoms = job.table(current_base_version).map(grep_cold_start).map(recommendations_reformatter)
        prev_recoms = job.table(base_version).map(grep_cold_start).map(recommendations_reformatter).project(uid='uid', prev_results='results')
        current_recoms.join(prev_recoms, by='uid', type='left') \
                      .map(upgradeability_calc(cold_start)) \
                      .project(ne.all(),
                               fielddate=ne.const(fielddate),
                               version=ne.const("cold_start"),
                               date_range=ne.const(date_range)) \
                      .groupby('category_type', 'fielddate', 'version', 'date_range') \
                      .aggregate(upgradability=na.mean('upgradability'),
                                 upgradability_top_20=na.mean('upgradability_top_20')) \
                      .put(result_table)
        job.run()
        push_to_stat_new(result_table, 'Video.All/Special/Vitrine/Vitrine upgradability metrics')

        ### Join recommendations tables and calc upgradubility
        for version in RECOMMENDATIONS_VERSIONS:
            print version
            result_table = "//tmp/mma-1523/" + date_str + '_' + version + '_' + str(date_range)
            job = cluster.job()
            current_recoms = job.table(RECOMMENDATIONS_PREFIX + "/" + date_str + RECOMMENDATIONS_SUFFIX + version + RECOMMENDATIONS_SUFFIX_SUFFIX).random(fraction=0.01).map(recommendations_reformatter)
            prev_recoms = job.table(RECOMMENDATIONS_PREFIX + "/" + prev_date_str + RECOMMENDATIONS_SUFFIX + version + RECOMMENDATIONS_SUFFIX_SUFFIX).map(recommendations_reformatter).project(uid='uid', prev_results='results')
            current_recoms.join(prev_recoms, by='uid', type='left') \
                          .map(upgradeability_calc(cold_start)) \
                          .project(ne.all(),
                                   fielddate=ne.const(fielddate),
                                   version=ne.const(version),
                                   date_range=ne.const(date_range)) \
                          .groupby('category_type', 'fielddate', 'version', 'date_range') \
                          .aggregate(upgradability=na.mean('upgradability'),
                                     upgradability_top_20=na.mean('upgradability_top_20')) \
                          .put(result_table)
            job.run()
            push_to_stat_new(result_table, 'Video.All/Special/Vitrine/Vitrine upgradability metrics')

if __name__ == "__main__":
    main()
