# -*- coding: utf-8 -*-

from nile.api.v1 import (
    filters as nf,
    aggregators as na,
    extractors as ne,
    statface as ns,
    clusters,
    Record
)

from qb2.api.v1 import extractors as se, filters as sf
import argparse
import nile
import datetime, time
import json, re
import urllib, urlparse
import math,cgi
import pandas as pd
from itertools import product
import sys
import os
import copy
import random
from collections import defaultdict


cluster = clusters.yt.Hahn(pool='vika-pavlova'
      ).env(templates=dict(job_root='//home/videolog/vika-pavlova/video_recommendations'
                          ),
            yt_spec_defaults=dict(pool_trees=["physical"],
                                  tentative_pool_trees=["cloud"]),
            parallel_operations_limit=10
           )


def data_adding(carousel_type, date):

    job = cluster.job()

    results = job.table('//home/videolog/vika-pavlova/video_recommendations/offline_results/' + carousel_type + '/result_' + date
                       )

    workers_puids = job.table('//home/toloka/prod/export/workers/puids'
                             )

    worker_warmth = job.table('//home/videolog/vika-pavlova/video_recommendations/tolokers_warmth'
                             ).project("action_count",
                                       puid = ne.custom(lambda x: x, 'uid')
                                      )

    with_puids = results.join(workers_puids, by_left = "workerId", by_right = "worker_id", type = 'left'
                             ).project(ne.all(exclude = ('worker_id', 'uid')),
                                       puid = ne.custom(lambda x: str(x), 'uid')
                                      )

    final = with_puids.join(worker_warmth, by = 'puid', type = 'left'
              ).project(ne.all(exclude = 'action_count'),
                        action_count = ne.custom(lambda x: x if x else 0, 'action_count')
                       ).put('//home/videolog/vika-pavlova/video_recommendations/offline_results/' + carousel_type + '/result_' + date
                            )

    job.run()


class aggregate_results():

    def __init__(self, carousel_type):
        self.carousel_type = carousel_type

    def __call__(self, groups):

        for key, recs in groups:
            result = {}
            for rec in recs:
                onto_id = rec['inputValues']['onto_id']
                video_type = self.carousel_type.replace('serp_', '')
                label = rec['outputValues']['label']
                result[onto_id] = {
                    'video_type': video_type,
                    'label': label,
                    'title': rec['inputValues']['title'],
                    'embed_url': rec['inputValues']['embed_url'],
                    'thumbnail': rec['inputValues']['thumbnail'],
                    'description': rec['inputValues']['description']
                }
            yield Record(action_count = key['action_count'], puid = key['puid'], workerId = key['workerId'],
                         result = result
                    )

def merge_results(carousel_type, date):

    job = cluster.job()

    recoms = job.table('//home/videolog/vika-pavlova/video_recommendations/offline_recomms/recoms/' + carousel_type + '/'  + date)

    cold_recoms = job.table('//home/videolog/vika-pavlova/video_recommendations/offline_recomms/cold_recoms/' + carousel_type + '/'  + date)

    results = job.table('//home/videolog/vika-pavlova/video_recommendations/offline_results/' + carousel_type + '/result_' + date)

    t = results.groupby('action_count', 'puid', 'workerId'
                                ).reduce(aggregate_results(carousel_type)
                                         )

    t.join(recoms, by = 'workerId'
                ).put('//home/videolog/vika-pavlova/video_recommendations/offline_recomms/recoms/' + carousel_type + '/results_'  + date)

    t.join(cold_recoms, by = 'workerId'
                ).put('//home/videolog/vika-pavlova/video_recommendations/offline_recomms/cold_recoms/' + carousel_type + '/results_'  + date)

    job.run()


def calc_warmth(results):
    users_info = defaultdict(dict)
    users_info_discounted = defaultdict(dict)
    discount_users = set()

    for rec in results:

        users_info[rec['puid']]['action'] = rec['action']
        users_info_discounted[rec['puid']]['action'] = rec['action']

        best_count = 0
        total = 0

        for value in rec['toloka_result'].itervalues():
            total += 1
            if value['label'] == '+2':
                best_count += 1

        if total == best_count:
            discount_users.add(rec['puid'])

    A0 = 0
    A1 = 0
    A2 = 0
    A10 = 0
    for toloker, value in users_info.iteritems():
        if value['action'] == 0:
            A0 += 1
        elif value['action'] == 1:
            A1 += 1
        elif value['action'] > 1 and value['action'] < 10:
            A2 += 1
        else:
            A10 += 1

    S = A0 + A1 + A2 + A10
    W0 = (0.7 * S) / A0 if A0 else 0
    W1 = (0.1 * S) / A1 if A1 else 0
    W2 = (0.15 * S) / A2 if A2 else 0
    W10 = (0.05 * S) / A10 if A10 else 0

    W = W0 * A0 +  W1 * A1 +  W2 * A2 +  W10 * A10

    for toloker, value in users_info.iteritems():
        if value['action'] == 0:
            users_info[toloker]['weight'] = W0
            users_info[toloker]['total_weight'] = W
            users_info[toloker]['tolokers_count'] = S
        elif value['action'] == 1:
            users_info[toloker]['weight'] = W1
            users_info[toloker]['total_weight'] = W
            users_info[toloker]['tolokers_count'] = S
        elif value['action'] > 1 and value['action'] < 10:
            users_info[toloker]['weight'] = W2
            users_info[toloker]['total_weight'] = W
            users_info[toloker]['tolokers_count'] = S
        else:
            users_info[toloker]['weight'] = W10
            users_info[toloker]['total_weight'] = W
            users_info[toloker]['tolokers_count'] = S

    A0_d = 0
    A1_d = 0
    A2_d = 0
    A10_d = 0

    for toloker, value in users_info_discounted.iteritems():
        if toloker in discount_users:
            continue
        if value['action'] == 0:
            A0_d += 1
        elif value['action'] == 1:
            A1_d += 1
        elif value['action'] > 1 and value['action'] < 10:
            A2_d += 1
        else:
            A10_d += 1

    S_d = A0_d + A1_d + A2_d + A10_d
    W0_d = (0.7 * S_d) / A0_d if A0_d else 0
    W1_d = (0.1 * S_d) / A1_d if A1_d else 0
    W2_d = (0.15 * S_d) / A2_d if A2_d else 0
    W10_d = (0.05 * S_d) / A10_d if A10_d else 0

    W_d = W0_d * A0_d +  W1_d * A1_d +  W2_d * A2_d +  W10_d * A10_d

    for toloker, value in users_info_discounted.iteritems():
        if toloker in discount_users:
            users_info_discounted[toloker]['weight'] = 0
            users_info_discounted[toloker]['total_weight'] = 0
            users_info_discounted[toloker]['tolokers_count'] =0
        else:
            if value['action'] == 0:
                users_info_discounted[toloker]['weight'] = W0_d
                users_info_discounted[toloker]['total_weight'] = W_d
                users_info_discounted[toloker]['tolokers_count'] = S_d
            elif value['action'] == 1:
                users_info_discounted[toloker]['weight'] = W1_d
                users_info_discounted[toloker]['total_weight'] = W_d
                users_info_discounted[toloker]['tolokers_count'] = S_d
            elif value['action'] > 1 and value['action'] < 10:
                users_info_discounted[toloker]['weight'] = W2_d
                users_info_discounted[toloker]['total_weight'] = W_d
                users_info_discounted[toloker]['tolokers_count'] = S_d
            else:
                users_info_discounted[toloker]['weight'] = W10_d
                users_info_discounted[toloker]['total_weight'] = W_d
                users_info_discounted[toloker]['tolokers_count'] = S_d
    return {'users_info': users_info,
            'users_info_discounted': users_info_discounted,
            'discount_users': discount_users
           }


def serps_gathering(results):

    serps = []

    current_ts = int(time.time())

    users_dict = calc_warmth(results)

    for res in results:
        components = []
        user = res['puid']
        serp = {
            'type': 'SERP',
            'query': {
                'text': user,
                'regionId': 225,
                'device': 0,
                'country': 'RU'
            },
            'query_param.action_count': json.dumps(users_dict['users_info'][user], ensure_ascii=False, indent=4).encode('utf-8'),
            'query_param.action_count_discounted': json.dumps(users_dict['users_info_discounted'][user], ensure_ascii=False, indent=4).encode('utf-8'),
            'query_param.discount_filter': 'yes' if user in users_dict['discount_users'] else 'no',
            #'query_param.action_value': str(users_info[user]['action']),
            #'components': res['components']
        }

        for item in res['components']:

            image_url = item.get('thumbadd', {}).get('urls', [])[0]

            component = {
                'type': 'COMPONENT',
                'componentInfo': {
                    'type': 1
                },
                'text.cardType': 'video',
                'text.cardId': item.get('onto_id', ''),
                'text.title': item.get('title', ''),
                'text.snippet': item.get('description', ''),
                'componentUrl': item['componentUrl'],
                'imageadd': {'url': image_url},
                'url.imageUrl': image_url,
                'url.videoThumbHref': image_url,
                'thumbadd': item['thumbadd']
            }

            onto_id = item["onto_id"]

            if onto_id not in res['toloka_result']:
                continue

            component['judgements.collections_feed_pointwise_label'] = {
                'scale': 'collections_feed_pointwise_label',
                'name': res['toloka_result'][onto_id]['label'],
                'ts': current_ts,
            }

            component['judgements.content_type'] = {
                'scale': 'content_type',
                'name': res['toloka_result'][onto_id]['video_type'],
                'ts': current_ts,
            }
            components.append(component)

        serp['components'] = components

        serps.append(serp)

    return serps


def main():

    parser = argparse.ArgumentParser()
    parser.add_argument('--date', type=str, required=True)
    parser.add_argument('--carousel_type', type=str, required=True)
    args = parser.parse_args()

    data_adding(args.carousel_type, args.date)
    merge_results(args.carousel_type, args.date)

    results = []
    for rec in cluster.read('//home/videolog/vika-pavlova/video_recommendations/offline_recomms/recoms/' + args.carousel_type + '/results_'  + args.date):
        results.append({'puid': rec['puid'],
                        "workerId": rec["workerId"],
                        "action": rec["action_count"],
                        "toloka_result": rec["result"],
                        'components': rec["components"]
                       })

    recs = serps_gathering(results)

    cluster.write('//home/videolog/vika-pavlova/video_recommendations/offline_recomms/serps/' + args.carousel_type + '/prod_'  + args.date,
                  [Record(**dct) for dct in recs]
                 )

    cold_results = []
    for rec in cluster.read('//home/videolog/vika-pavlova/video_recommendations/offline_recomms/cold_recoms/' + args.carousel_type + '/results_'  + args.date):
        cold_results.append({'puid': rec['puid'],
                        "workerId": rec["workerId"],
                        "action": rec["action_count"],
                        "toloka_result": rec["result"],
                        'components': rec["components"]
                       })

    cold_recs = serps_gathering(cold_results)

    cluster.write('//home/videolog/vika-pavlova/video_recommendations/offline_recomms/serps/' + args.carousel_type + '/cold_start_'  + args.date,
                  [Record(**dct) for dct in cold_recs]
                 )


if __name__ == '__main__':
    main()
