# -*- coding: utf-8 -*-

from nile.api.v1 import (
    filters as nf,
    aggregators as na,
    extractors as ne,
    statface as ns,
    clusters,
    Record
)

from qb2.api.v1 import extractors as se, filters as sf
import argparse
import nile
import datetime, time
import json, re
import urllib, urlparse
import math,cgi
import pandas as pd
from itertools import product
import sys
import os
import copy
import random


cluster = clusters.yt.Hahn(pool='vika-pavlova'
      ).env(templates=dict(job_root='//home/videolog/vika-pavlova/video_recommendations/offline_recomms/new_cold_honeypots'
                          ),
            yt_spec_defaults=dict(pool_trees=["physical"],
                                  tentative_pool_trees=["cloud"]),
            parallel_operations_limit=10
           )


def parse_onto_base(recs):

    for rec in recs:

        value = json.loads(rec["value"])

        #thumbnail
        images = value.get('Image', [])
        thumbnail = 'no_thumb'
        for item in images:
            thumbnail = item.get('url', 'no_thumb')
            if thumbnail != 'no_thumb':
                break

        #title
        titles = value.get('Title', [])
        title = 'no_title'
        for item in titles:
            if 'RelevLocale' in item and 'ru' in item['RelevLocale']:
                title = item.get('value', 'no_title')
            if title != 'no_title':
                break

        #description
        defins = value.get('isa', {}).get('Defin', [])
        description = 'no_description'
        for item in defins:
            if 'RelevLocale' in item and 'ru' in item['RelevLocale']:
                description = item.get('value', 'no_description')
            if description != 'no_description':
                break

        #embed_url
        ids = value.get('ids', [])
        if ids:
            embed_url = ids[0].get('value', 'no_url')
            for item in ids:
                if 'kinopoisk' in  item.get('value', 'no_url'):
                    embed_url = item.get('value', 'no_url')
        else:
            embed_url = 'no_url'

        #type
        ugc_types = value.get('isa', {}).get('ugc_type', [])
        if not ugc_types:
            ugc_type = 'no_type'
        else:
            ugc_type =  ugc_types[0]

        yield Record(thumbnail = thumbnail, title = title,
                     description = description, embed_url = embed_url, ugc_type = ugc_type,
                     assessment = rec.assessment, onto_id = rec['key'], reqs = rec.reqs
                    )


def serp_likes():

    print 'serp_likes'

    six_month_ts = time.mktime(datetime.datetime.strptime(str(datetime.datetime.now()).split(' ')[0], "%Y-%m-%d").timetuple()) - 2592000 * 6

    job = cluster.job()

    ugc = job.table('//home/dict/recommender/actions/latest'
               ).project("action_type", 'timestamp',
                         key = ne.custom(lambda x: x, "to_id"),
                         assessment = ne.custom(lambda x: x, "value")
                        ).filter(sf.and_(sf.custom(lambda x, y: x in [1, 2] and y in [0.5, 1, 5],
                                          'action_type', 'assessment'),
                                         sf.custom(lambda x: x > six_month_ts, 'timestamp')
                                        )
                                ).put('$job_root/ugc')

    onto_base = job.table('//home/dict/ontodb/ver/daily/production/all_cards_final')

    avg_labels = job.table('//home/dict/recommender/actions/latest'
                      ).project("action_type",
                                onto_id = ne.custom(lambda x: x, "to_id"),
                                assessment = ne.custom(lambda x: x, "value")
                               ).groupby('onto_id'
                                        ).aggregate(avg = na.mean('assessment')
                                                   ).put('$job_root/avg')

    #serp likes
    good = ugc.filter(sf.equals('assessment', 5)
                     ).groupby('key'
                              ).aggregate(reqs = na.count(),
                                          assessment = na.mean('assessment')
                                         ).join(onto_base, by = 'key'
                                              ).map(parse_onto_base
                                                   ).join(avg_labels, by = 'onto_id'
                                                         ).filter(sf.custom(lambda x, y, z, a: x > 4 and y != 'no_title' and z != 'no_description' and a != "no_thumb",
                                                                            "avg",'title', 'description', 'thumbnail')
                                                                 )

    good_films = good.filter(sf.equals("ugc_type", "film")
                            ).top(500, by = 'reqs'
                                 ).project(ne.all(exclude = ("assessment", "ugc_type"))
                                          ).put('$job_root/serp_film/good_cold')
    good_series = good.filter(sf.equals("ugc_type", "series")
                             ).top(500, by = 'reqs'
                                  ).project(ne.all(exclude = ("assessment", "ugc_type"))
                                            ).put('$job_root/serp_series/good_cold')
    good_anim = good.filter(sf.equals("ugc_type", "anim")
                           ).top(500, by = 'reqs'
                                ).project(ne.all(exclude = ("assessment", "ugc_type"))
                                          ).put('$job_root/serp_anim_film/good_cold')
    good_anim_series = good.filter(sf.equals("ugc_type", "anim-series")
                                  ).top(500, by = 'reqs'
                                       ).project(ne.all(exclude = ("assessment", "reqs", "ugc_type"))
                                          ).put('$job_root/serp_anim_series/good_cold')
    job.concat(good_films, good_series, good_anim, good_anim_series
              ).top(500, by = 'reqs'
                   ).project(ne.all(exclude = ("assessment", "ugc_type"))
                                          ).put('$job_root/serp_mixed/good_cold')

    bad = ugc.filter(sf.custom(lambda y: y in [0.5, 1], 'assessment')
                     ).groupby('key'
                              ).aggregate(reqs = na.count(),
                                          assessment = na.mean('assessment')
                                         ).join(onto_base, by = 'key'
                                               ).map(parse_onto_base
                                                   ).join(avg_labels, by = 'onto_id'
                                                         ).filter(sf.custom(lambda x, y, z, a: x < 1 and y != 'no_title' and z != 'no_description' and a != "no_thumb",
                                                                            "avg",'title', 'description', 'thumbnail')
                                                                 )

    bad_films = bad.filter(sf.equals("ugc_type", "film")
                          ).top(500, by = 'reqs'
                               ).project(ne.all(exclude = ("assessment", "ugc_type"))
                                        ).put('$job_root/serp_film/bad_cold')
    bad_series = bad.filter(sf.equals("ugc_type", "series")
                           ).top(500, by = 'reqs'
                                ).project(ne.all(exclude = ("assessment", "ugc_type"))
                                          ).put('$job_root/serp_series/bad_cold')
    bad_anim = bad.filter(sf.equals("ugc_type", "anim")
                         ).top(500, by = 'reqs'
                              ).project(ne.all(exclude = ("assessment", "ugc_type"))
                                       ).put('$job_root/serp_anim_film/bad_cold')
    bad_anim_series = bad.filter(sf.equals("ugc_type", "anim-series")
                                ).top(500, by = 'reqs'
                                     ).project(ne.all(exclude = ("assessment", "ugc_type"))
                                              ).put('$job_root/serp_anim_series/bad_cold')
    job.concat(bad_films, bad_series, bad_anim, bad_anim_series
              ).top(500, by = 'reqs'
                   ).project(ne.all(exclude = ("assessment", "ugc_type"))
                            ).put('$job_root/serp_mixed/bad_cold')

    #serp vh likes

    actual_urls = job.table('//home/video-hosting/ya-video/actual_urls.full'
                     ).project('UUID',
                               title = ne.custom(lambda x: x, 'ComputedName'),
                               thumbnail = ne.custom(lambda x: x.get('thumbnail', 'no_thumbnail') if x else 'no_thumbnail', 'Resources'),
                               description = ne.custom(lambda x: x, 'Comment'),
                               embed_url = ne.custom(lambda x: 'https://' + x, 'GroupingUrl'),
                               ya_video_preview = ne.custom(lambda x: x.get('ya_video_preview', 'no_preview') if x else 'no_preview', 'Resources'),
                               onto_id = ne.custom(lambda x: x.get("onto_id", 'none'), 'Resources')
                              ).filter(sf.and_(sf.custom(lambda x: x != 'no_thumbnail', 'thumbnail'),
                                               sf.custom(lambda x: x != 'no_preview', 'ya_video_preview')
                                              )
                                      )

    good_vh = good.project("onto_id", 'reqs', 'ugc_type'
                          ).join(actual_urls, by = 'onto_id'
                                )
    good_vh_films = good_vh.filter(sf.equals("ugc_type", "film")
                                  ).top(500, by = 'reqs'
                                       ).project(ne.all(exclude = ("ugc_type", 'onto_id'))
                                                 ).put('$job_root/entity/good_cold')
    good_vh_series = good_vh.filter(sf.equals("ugc_type", "series")
                                  ).top(500, by = 'reqs'
                                       ).project(ne.all(exclude = ("ugc_type", 'onto_id'))
                                                 ).put('$job_root/entity_series/good_cold')
    good_vh_anim = good_vh.filter(sf.equals("ugc_type", "anim")
                                  ).top(500, by = 'reqs'
                                       ).project(ne.all(exclude = ("ugc_type", 'onto_id'))
                                                 ).put('$job_root/anim_entity/good_cold')
    good_vh_anim_series = good_vh.filter(sf.equals("ugc_type", "anim-series")
                                        ).top(500, by = 'reqs'
                                             ).project(ne.all(exclude = ("ugc_type", 'onto_id'))
                                                      ).put('$job_root/anim_series_entity/good_cold')
    job.concat(good_vh_films, good_vh_series, good_vh_anim, good_vh_anim_series
              ).top(500, by = 'reqs'
                   ).project(ne.all(exclude = ("ugc_type", 'onto_id'))
                            ).put('$job_root/entity_mixed/good_cold')


    bad_vh = bad.project("onto_id", 'reqs', 'ugc_type'
                          ).join(actual_urls, by = 'onto_id'
                                )
    bad_vh_films = bad_vh.filter(sf.equals("ugc_type", "film")
                                  ).top(500, by = 'reqs'
                                       ).project(ne.all(exclude = ("ugc_type", 'onto_id'))
                                                 ).put('$job_root/entity/bad_cold')
    bad_vh_series = bad_vh.filter(sf.equals("ugc_type", "series")
                                  ).top(500, by = 'reqs'
                                       ).project(ne.all(exclude = ("ugc_type", 'onto_id'))
                                                 ).put('$job_root/entity_series/bad_cold')
    bad_vh_anim = bad_vh.filter(sf.equals("ugc_type", "anim")
                                  ).top(500, by = 'reqs'
                                       ).project(ne.all(exclude = ("ugc_type", 'onto_id'))
                                                 ).put('$job_root/anim_entity/bad_cold')
    bad_vh_anim_series = bad_vh.filter(sf.equals("ugc_type", "anim-series")
                                        ).top(500, by = 'reqs'
                                             ).project(ne.all(exclude = ("ugc_type", 'onto_id'))
                                                 ).put('$job_root/anim_series_entity/bad_cold')
    job.concat(bad_vh_films, bad_vh_series, bad_vh_anim, bad_vh_anim_series
              ).top(500, by = 'reqs'
                   ).project(ne.all(exclude = ("ugc_type", 'onto_id'))
                            ).put('$job_root/entity_mixed/bad_cold')

    job.run()


def efir_likes():

    print 'likes_dislikes'

    days_count = 182

    job = cluster.job()

    yt = cluster.driver.client
    def exists_and_not_empty(path, yt):
        return yt.exists(path) and not yt.is_empty(path)

    to_concat = []
    for i in range(days_count):
        table_name = '//home/videolog/sociality_squeeze/reaction_' +  datetime.datetime.strftime(datetime.datetime.now() - datetime.timedelta(i+2) - datetime.timedelta(i), "%Y-%m-%d")
        table = job.table(table_name)
        if exists_and_not_empty(table_name, yt):
            print table_name
            to_concat.append(table)

    reactions = job.concat(*to_concat)

    actual_urls = job.table('//home/video-hosting/ya-video/actual_urls.full'
                     ).project('UUID', 'IsMusic',
                               title = ne.custom(lambda x: x, 'ComputedName'),
                               thumbnail = ne.custom(lambda x: x.get('thumbnail', 'no_thumbnail') if x else 'no_thumbnail', 'Resources'),
                               description = ne.custom(lambda x: x, 'Comment'),
                               embed_url = ne.custom(lambda x: 'https://' + x, 'GroupingUrl'),
                               ya_video_preview = ne.custom(lambda x: x.get('ya_video_preview', 'no_preview') if x else 'no_preview', 'Resources'),
                               onto_otype = ne.custom(lambda x: x.get("onto_otype", 'none') if x else 'none', 'Resources'),
                               blogger_id = ne.custom(lambda x: x.get("blogger_id", 'none') if x else 'none', 'Resources'),
                               tags = ne.custom(lambda x: x.get("carusel_detailed_tags", 'none') if x else 'none', 'Resources')
                              )

    filtered = reactions.filter(sf.and_(sf.equals("action_type", "reaction on video"),
                                        sf.custom(lambda x: x in ["like", "dislike"], "reaction_type")
                                       )
                                ).project(UUID = ne.custom(lambda x: x, "uuid"),
                                          action = ne.custom(lambda x: 1 if x == "like" else -1, "reaction_type")
                                         )

    avg = filtered.groupby("UUID"
                          ).aggregate(avg_label = na.mean('action'))

    likes = filtered.filter(sf.equals('action', 1)
                           ).groupby("UUID"
                                    ).aggregate(assessment_count = na.count()
                                               ).join(avg, by = "UUID"
                                                     ).filter(sf.custom(lambda x: x > 0.5, 'avg_label')
                                                             ).join(actual_urls, by = 'UUID')

    dislikes = filtered.filter(sf.equals('action', -1)
                              ).groupby("UUID"
                                    ).aggregate(assessment_count = na.count()
                                               ).join(avg, by = "UUID"
                                                     ).filter(sf.custom(lambda x: x < -0.5, 'avg_label')
                                                             ).join(actual_urls, by = 'UUID')

    likes.filter(sf.equals('onto_otype', "Film/Film")
                ).project(ne.all(exclude = ('onto_otype', 'blogger_id', 'IsMusic', 'tags'))
                         ).top(500, by = 'assessment_count'
                              ).put('$job_root/entity/good_cold', append = True)
    dislikes.filter(sf.equals('onto_otype', "Film/Film")
                ).project(ne.all(exclude = ('onto_otype', 'blogger_id', 'IsMusic', 'tags'))
                         ).top(500, by = 'assessment_count'
                              ).put('$job_root/entity/bad_cold', append = True)

    likes.filter(sf.equals('onto_otype', "Film/Series@on")
                ).project(ne.all(exclude = ('onto_otype', 'blogger_id', 'IsMusic', 'tags'))
                         ).top(500, by = 'assessment_count'
                              ).put('$job_root/entity_series/good_cold', append = True)
    dislikes.filter(sf.equals('onto_otype', "Film/Series@on")
                ).project(ne.all(exclude = ('onto_otype', 'blogger_id', 'IsMusic', 'tags'))
                         ).top(500, by = 'assessment_count'
                              ).put('$job_root/entity_series/bad_cold', append = True)

    likes.filter(sf.contains('tags', 'anim_film')
                ).project(ne.all(exclude = ('onto_otype', 'blogger_id', 'IsMusic', 'tags'))
                         ).top(500, by = 'assessment_count'
                              ).put('$job_root/anim_entity/good_cold', append = True)
    dislikes.filter(sf.contains('tags', 'anim_film')
                ).project(ne.all(exclude = ('onto_otype', 'blogger_id', 'IsMusic', 'tags'))
                         ).top(500, by = 'assessment_count'
                              ).put('$job_root/anim_entity/bad_cold', append = True)

    likes.filter(sf.contains('tags', 'anim_series')
                ).project(ne.all(exclude = ('onto_otype', 'blogger_id', 'IsMusic', 'tags'))
                         ).top(500, by = 'assessment_count'
                              ).put('$job_root/anim_series_entity/good_cold', append = True)
    dislikes.filter(sf.contains('tags', 'anim_series')
                ).project(ne.all(exclude = ('onto_otype', 'blogger_id', 'IsMusic', 'tags'))
                         ).top(500, by = 'assessment_count'
                              ).put('$job_root/anim_series_entity/bad_cold', append = True)

    likes.filter(sf.custom(lambda x: x in ["Film/Film", "Film/Series@on"], 'onto_otype')
                ).project(ne.all(exclude = ('onto_otype', 'blogger_id', 'IsMusic', 'tags'))
                         ).top(500, by = 'assessment_count'
                              ).put('$job_root/entity_mixed/good_cold', append = True)
    dislikes.filter(sf.custom(lambda x: x in ["Film/Film", "Film/Series@on"], 'onto_otype')
                ).project(ne.all(exclude = ('onto_otype', 'blogger_id', 'IsMusic', 'tags'))
                         ).top(500, by = 'assessment_count'
                              ).put('$job_root/entity_mixed/bad_cold', append = True)

    likes.project(ne.all(exclude = ('onto_otype', 'blogger_id', 'IsMusic', 'tags'))
                 ).top(500, by = 'assessment_count'
                      ).put('$job_root/efir/good_cold')
    dislikes.project(ne.all(exclude = ('onto_otype', 'blogger_id', 'IsMusic', 'tags'))
                 ).top(500, by = 'assessment_count'
                      ).put('$job_root/efir/bad_cold')

    likes.filter(sf.not_(sf.equals('blogger_id', "none"))
                ).project(ne.all(exclude = ('onto_otype', 'blogger_id', 'IsMusic', 'tags'))
                         ).top(500, by = 'assessment_count'
                              ).put('$job_root/bloggers/good_cold')
    dislikes.filter(sf.not_(sf.equals('blogger_id', "none"))
                ).project(ne.all(exclude = ('onto_otype', 'blogger_id', 'IsMusic', 'tags'))
                         ).top(500, by = 'assessment_count'
                              ).put('$job_root/bloggers/bad_cold')

    likes.filter(sf.equals('IsMusic', True)
                ).project(ne.all(exclude = ('onto_otype', 'blogger_id', 'IsMusic', 'tags'))
                         ).top(500, by = 'assessment_count'
                              ).put('$job_root/music/good_cold')
    dislikes.filter(sf.equals('IsMusic', True)
                ).project(ne.all(exclude = ('onto_otype', 'blogger_id', 'IsMusic', 'tags'))
                         ).top(500, by = 'assessment_count'
                              ).put('$job_root/music/bad_cold')

    likes.filter(sf.contains('tags', 'sport')
                ).project(ne.all(exclude = ('onto_otype', 'blogger_id', 'IsMusic', 'tags'))
                         ).top(500, by = 'assessment_count'
                              ).put('$job_root/sport/good_cold')
    dislikes.filter(sf.contains('tags', 'sport')
                ).project(ne.all(exclude = ('onto_otype', 'blogger_id', 'IsMusic', 'tags'))
                         ).top(500, by = 'assessment_count'
                              ).put('$job_root/sport/bad_cold')

    job.run()


def unique_hps():

    print 'unique_hps'

    job = cluster.job()

    for carousel in ['entity_mixed', 'entity', 'entity_series', 'anim_entity', 'anim_series_entity']:
        job.table('$job_root/' + carousel + '/good_cold'
                 ).unique('UUID','title', 'thumbnail', 'description', 'embed_url', 'ya_video_preview'
                          ).put('$job_root/' + carousel + '/good_cold')
        job.table('$job_root/' + carousel + '/bad_cold'
                 ).unique('UUID','title', 'thumbnail', 'description', 'embed_url', 'ya_video_preview'
                          ).put('$job_root/' + carousel + '/bad_cold')

    job.run()


def main():

    parser = argparse.ArgumentParser()
    parser.add_argument('--start_date', type=str, required=True)
    parser.add_argument('--end_date', type=str, required=True)
    parser.add_argument('--carousel_type', type=str, required=True)
    args = parser.parse_args()

    serp_likes()

    efir_likes()

    unique_hps()


if __name__ == '__main__':
    main()
