# -*- coding: utf-8 -*-

from nile.api.v1 import (
    filters as nf,
    aggregators as na,
    extractors as ne,
    statface as ns,
    clusters,
    Record
)

from qb2.api.v1 import extractors as se, filters as sf
import argparse
import nile
import datetime, time
import json, re
import urllib, urlparse
import math,cgi
import pandas as pd
from itertools import product
import sys
import os
import copy
import random


cluster = clusters.yt.Hahn(pool='vika-pavlova'
      ).env(templates=dict(job_root='//home/videolog/vika-pavlova/video_recommendations/offline_recomms/efir_likes'
                          ),
            yt_spec_defaults=dict(pool_trees=["physical"],
                                  tentative_pool_trees=["cloud"]),
            parallel_operations_limit=10
           )


def parse_onto_base(recs):

    for rec in recs:

        value = json.loads(rec["value"])

        #thumbnail
        images = value.get('Image', [])
        thumbnail = 'no_thumb'
        for item in images:
            thumbnail = item.get('url', 'no_thumb')
            if thumbnail != 'no_thumb':
                break

        #title
        titles = value.get('Title', [])
        title = 'no_title'
        for item in titles:
            if 'RelevLocale' in item and 'ru' in item['RelevLocale']:
                title = item.get('value', 'no_title')
            if title != 'no_title':
                break

        #description
        defins = value.get('isa', {}).get('Defin', [])
        description = 'no_description'
        for item in defins:
            if 'RelevLocale' in item and 'ru' in item['RelevLocale']:
                description = item.get('value', 'no_description')
            if description != 'no_description':
                break

        #embed_url
        ids = value.get('ids', [])
        if ids:
            embed_url = ids[0].get('value', 'no_url')
            for item in ids:
                if 'kinopoisk' in  item.get('value', 'no_url'):
                    embed_url = item.get('value', 'no_url')
        else:
            embed_url = 'no_url'

        #type
        ugc_types = value.get('isa', {}).get('ugc_type', [])
        if not ugc_types:
            ugc_type = 'no_type'
        else:
            ugc_type =  ugc_types[0]

        #assessment
        avg_label = 0
        if rec['action_type'] == 1:
            if rec['assessment'] in [4,5]:
                avg_label = 2
            elif rec['assessment'] in [1,2]:
                avg_label = -2
        elif rec['action_type'] ==2:
            if rec['assessment'] in [0.5, 1, 1.5, 2]:
                 avg_label = -2
            if rec['assessment'] in [3.5, 4, 4.5, 5]:
                 avg_label = 2
        else:
            avg_label = -2

        yield Record(puid = rec.puid, workerId = rec.workerId,
                     thumbnail = thumbnail, title = title,
                     description = description, embed_url = embed_url, ugc_type = ugc_type, avg_label = avg_label,
                     assessment = rec.assessment, action_type = rec["action_type"], onto_id = rec['key']
                    )
def serp_likes():

    print 'serp_likes'

    job = cluster.job()

    ugc = job.table('//home/dict/recommender/actions/latest'
                   ).project("action_type",
                             puid = ne.custom(lambda x: x.replace('p/', ''), 'id'),
                             key = ne.custom(lambda x: x, "to_id"),
                             assessment = ne.custom(lambda x: x, "value")
                            )

    tolokers = job.table('//home/videolog/vika-pavlova/video_recommendations/current_tolokers'
                        ).project('workerId',
                                  puid = ne.custom(lambda x: str(x), 'puid')
                                 )

    onto_base = job.table('//home/dict/ontodb/ver/daily/production/all_cards_final')

    actual_urls = job.table('//home/video-hosting/ya-video/actual_urls.full'
                     ).project('UUID',
                               title = ne.custom(lambda x: x, 'ComputedName'),
                               thumbnail = ne.custom(lambda x: x.get('thumbnail', 'no_thumbnail') if x else 'no_thumbnail', 'Resources'),
                               description = ne.custom(lambda x: x, 'Comment'),
                               embed_url = ne.custom(lambda x: 'https://' + x, 'GroupingUrl'),
                               ya_video_preview = ne.custom(lambda x: x.get('ya_video_preview', 'no_preview') if x else 'no_preview', 'Resources'),
                               onto_id = ne.custom(lambda x: x.get("onto_id", 'none'), 'Resources')
                              ).filter(sf.and_(sf.custom(lambda x: x != 'no_thumbnail', 'thumbnail'),
                                               sf.custom(lambda x: x != 'no_preview', 'ya_video_preview')
                                              )
                                      )

    raw = ugc.join(tolokers, by = 'puid'
                  ).join(onto_base, by = 'key'
                        )
    #serp likes
    final = raw.map(parse_onto_base
                   ).filter(sf.and_(sf.custom(lambda x: x != 'no_thumb', 'thumbnail'),
                            sf.custom(lambda x: x != 'no_title', 'title'),
                            sf.custom(lambda x: x != 'no_description', 'description'),
                            sf.custom(lambda x: x != 'no_url', 'embed_url'),
                            sf.custom(lambda x: x != 'no_type', 'ugc_type'),
                            sf.custom(lambda x: x != 0, 'avg_label')
                           )
                   ).put('//home/videolog/vika-pavlova/video_recommendations/offline_recomms/serp_likes/serp_mixed')

    final.put('//home/videolog/vika-pavlova/video_recommendations/offline_recomms/all_likes/serp_mixed')

    final.filter(sf.equals('ugc_type', 'film')
                ).put('//home/videolog/vika-pavlova/video_recommendations/offline_recomms/serp_likes/serp_film')
    final.filter(sf.equals('ugc_type', 'film')
                ).put('//home/videolog/vika-pavlova/video_recommendations/offline_recomms/all_likes/serp_film')

    final.filter(sf.equals('ugc_type', 'series')
                ).put('//home/videolog/vika-pavlova/video_recommendations/offline_recomms/serp_likes/serp_series')
    final.filter(sf.equals('ugc_type', 'series')
                ).put('//home/videolog/vika-pavlova/video_recommendations/offline_recomms/all_likes/serp_series')

    final.filter(sf.equals('ugc_type', 'anim-series')
                ).put('//home/videolog/vika-pavlova/video_recommendations/offline_recomms/serp_likes/serp_anim_series')
    final.filter(sf.equals('ugc_type', 'anim-series')
                ).put('//home/videolog/vika-pavlova/video_recommendations/offline_recomms/all_likes/serp_anim_series')

    final.filter(sf.equals('ugc_type', 'anim')
                ).put('//home/videolog/vika-pavlova/video_recommendations/offline_recomms/serp_likes/serp_anim_film')
    final.filter(sf.equals('ugc_type', 'anim')
                ).put('//home/videolog/vika-pavlova/video_recommendations/offline_recomms/all_likes/serp_anim_film')

    #serp vh likes
    t = final.project('onto_id', "puid", 'ugc_type', "workerId", "avg_label"
                      ).join(actual_urls, by = 'onto_id'
                            )

    t1 = t.groupby('onto_id', "puid", 'ugc_type', "workerId", "avg_label"
                  ).aggregate(title = na.min('title')
                             )

    vh = t.join(t1, by = ('onto_id', "puid", 'ugc_type', "workerId", 'title', "avg_label"
                         )
               ).put('//home/videolog/vika-pavlova/video_recommendations/offline_recomms/serp_likes/entity_mixed')

    vh.filter(sf.equals('ugc_type', 'film')
                ).put('//home/videolog/vika-pavlova/video_recommendations/offline_recomms/serp_likes/entity')

    vh.filter(sf.equals('ugc_type', 'series')
                ).put('//home/videolog/vika-pavlova/video_recommendations/offline_recomms/serp_likes/entity_series')

    vh.filter(sf.equals('ugc_type', 'anim-series')
                ).put('//home/videolog/vika-pavlova/video_recommendations/offline_recomms/serp_likes/anim_series_entity')

    vh.filter(sf.equals('ugc_type', 'anim')
                ).put('//home/videolog/vika-pavlova/video_recommendations/offline_recomms/serp_likes/anim_entity')

    job.run()


def likes_dislikes(start, end):

    print 'likes_dislikes'

    job = cluster.job()

    reactions = job.table('//home/videolog/sociality_squeeze/reaction_{' + start + '..' + end +'}')

    puids = job.table('//home/videolog/vika-pavlova/video_recommendations/current_tolokers'
                     ).project('workerId', puid = ne.custom(lambda x: str(x), 'puid'))

    actual_urls = job.table('//home/video-hosting/ya-video/actual_urls.full'
                     ).project('UUID', 'IsMusic',
                               title = ne.custom(lambda x: x, 'ComputedName'),
                               thumbnail = ne.custom(lambda x: x.get('thumbnail', 'no_thumbnail') if x else 'no_thumbnail', 'Resources'),
                               description = ne.custom(lambda x: x, 'Comment'),
                               embed_url = ne.custom(lambda x: 'https://' + x, 'GroupingUrl'),
                               ya_video_preview = ne.custom(lambda x: x.get('ya_video_preview', 'no_preview') if x else 'no_preview', 'Resources'),
                               onto_otype = ne.custom(lambda x: x.get("onto_otype", 'none') if x else 'none', 'Resources'),
                               blogger_id = ne.custom(lambda x: x.get("blogger_id", 'none') if x else 'none', 'Resources'),
                               tags = ne.custom(lambda x: x.get("carusel_detailed_tags", 'none') if x else 'none', 'Resources')
                              )

    filtered = reactions.filter(sf.and_(sf.equals("action_type", "reaction on video"),
                                        sf.custom(lambda x: x in ["like", "dislike"], "reaction_type")
                                       )
                                ).project('puid',
                                          UUID = ne.custom(lambda x: x, "uuid"),
                                          action = ne.custom(lambda x: 1 if x == "like" else -1, "reaction_type")
                                         )

    aggr = filtered.groupby('puid', "UUID"
                           ).aggregate(avg_label = na.mean('action'),
                                       assessment_count = na.count()
                                      )

    final = aggr.join(puids, by = 'puid'
             ).join(actual_urls, by = 'UUID'
                   )

    final.project(ne.all(exclude = ('onto_otype', 'blogger_id', 'IsMusic', 'tags'))
                 ).put('//home/videolog/vika-pavlova/video_recommendations/offline_recomms/efir_likes/efir')

    final.filter(sf.equals('onto_otype', "Film/Film")
                ).project(ne.all(exclude = ('onto_otype', 'blogger_id', 'IsMusic', 'tags'))
                         ).put('//home/videolog/vika-pavlova/video_recommendations/offline_recomms/efir_likes/entity')

    final.filter(sf.equals('onto_otype', "Film/Series@on")
                ).project(ne.all(exclude = ('onto_otype', 'blogger_id', 'IsMusic', 'tags'))
                         ).put('//home/videolog/vika-pavlova/video_recommendations/offline_recomms/efir_likes/entity_series')

    final.filter(sf.not_(sf.equals('blogger_id', "none"))
                ).project(ne.all(exclude = ('onto_otype', 'blogger_id', 'IsMusic', 'tags'))
                         ).put('//home/videolog/vika-pavlova/video_recommendations/offline_recomms/efir_likes/bloggers')

    final.filter(sf.equals('IsMusic', True)
                ).project(ne.all(exclude = ('onto_otype', 'blogger_id', 'IsMusic', 'tags'))
                         ).put('//home/videolog/vika-pavlova/video_recommendations/offline_recomms/efir_likes/music')

    final.filter(sf.contains('tags', 'sport')
                ).project(ne.all(exclude = ('onto_otype', 'blogger_id', 'IsMusic', 'tags'))
                         ).put('//home/videolog/vika-pavlova/video_recommendations/offline_recomms/efir_likes/sport')

    final.filter(sf.contains('tags', 'anim_film')
                ).project(ne.all(exclude = ('onto_otype', 'blogger_id', 'IsMusic', 'tags'))
                         ).put('//home/videolog/vika-pavlova/video_recommendations/offline_recomms/efir_likes/anim_entity')

    final.filter(sf.contains('tags', 'anim_series')
                ).project(ne.all(exclude = ('onto_otype', 'blogger_id', 'IsMusic', 'tags'))
                         ).put('//home/videolog/vika-pavlova/video_recommendations/offline_recomms/efir_likes/anim_series_entity')

    final.filter(sf.custom(lambda x: x in ["Film/Film", "Film/Series@on"], 'onto_otype')
                ).project(ne.all(exclude = ('onto_otype', 'blogger_id', 'IsMusic', 'tags'))
                         ).put('//home/videolog/vika-pavlova/video_recommendations/offline_recomms/efir_likes/entity_mixed')

    job.run()


def join_vh_likes():

    print 'join_vh_likes'

    job = cluster.job()

    #serp likes
    serp_mixed = job.table('//home/videolog/vika-pavlova/video_recommendations/offline_recomms/serp_likes/entity_mixed')
    serp_films = job.table('//home/videolog/vika-pavlova/video_recommendations/offline_recomms/serp_likes/entity')
    serp_series = job.table('//home/videolog/vika-pavlova/video_recommendations/offline_recomms/serp_likes/entity_series')
    serp_anim = job.table('//home/videolog/vika-pavlova/video_recommendations/offline_recomms/serp_likes/anim_entity')
    serp_anim_series = job.table('//home/videolog/vika-pavlova/video_recommendations/offline_recomms/serp_likes/anim_series_entity')

    #efir likes
    efir_films = job.table('//home/videolog/vika-pavlova/video_recommendations/offline_recomms/efir_likes/entity')
    efir_series = job.table('//home/videolog/vika-pavlova/video_recommendations/offline_recomms/efir_likes/entity_series')
    efir_anim = job.table('//home/videolog/vika-pavlova/video_recommendations/offline_recomms/efir_likes/anim_entity')
    efir_anim_series = job.table('//home/videolog/vika-pavlova/video_recommendations/offline_recomms/efir_likes/anim_series_entity')
    efir_mixed = job.table('//home/videolog/vika-pavlova/video_recommendations/offline_recomms/efir_likes/entity_mixed')
    efir_bloggers = job.table('//home/videolog/vika-pavlova/video_recommendations/offline_recomms/efir_likes/bloggers')
    efir_music = job.table('//home/videolog/vika-pavlova/video_recommendations/offline_recomms/efir_likes/music')
    efir_sport = job.table('//home/videolog/vika-pavlova/video_recommendations/offline_recomms/efir_likes/sport')

    #concat
    job.concat(serp_mixed, efir_mixed).put('//home/videolog/vika-pavlova/video_recommendations/offline_recomms/all_likes/entity_mixed')
    job.concat(serp_films, efir_films).put('//home/videolog/vika-pavlova/video_recommendations/offline_recomms/all_likes/entity')
    job.concat(serp_series, efir_series).put('//home/videolog/vika-pavlova/video_recommendations/offline_recomms/all_likes/entity_series')
    job.concat(serp_anim, efir_anim).put('//home/videolog/vika-pavlova/video_recommendations/offline_recomms/all_likes/anim_entity')
    job.concat(serp_anim_series, efir_anim_series).put('//home/videolog/vika-pavlova/video_recommendations/offline_recomms/all_likes/anim_series_entity')
    efir_bloggers.put('//home/videolog/vika-pavlova/video_recommendations/offline_recomms/all_likes/bloggers')
    efir_music.put('//home/videolog/vika-pavlova/video_recommendations/offline_recomms/all_likes/music')
    efir_sport.put('//home/videolog/vika-pavlova/video_recommendations/offline_recomms/all_likes/sport')

    job.run()


def main():

    parser = argparse.ArgumentParser()
    parser.add_argument('--start_date', type=str, required=True)
    parser.add_argument('--end_date', type=str, required=True)
    parser.add_argument('--carousel_type', type=str, required=True)
    args = parser.parse_args()

    serp_likes()

    likes_dislikes(args.start_date, args.end_date)

    join_vh_likes()


if __name__ == '__main__':
    main()
