# -*- coding: utf-8 -*-

from nile.api.v1 import (
    filters as nf,
    aggregators as na,
    extractors as ne,
    statface as ns,
    clusters,
    Record
)

from qb2.api.v1 import extractors as se, filters as sf
import argparse
import nile
import datetime, time
import json, re
import urllib, urlparse
import math,cgi
import pandas as pd
from itertools import product
import sys
import os
import copy
import random


cluster = clusters.yt.Hahn(pool='vika-pavlova'
      ).env(templates=dict(job_root='//home/videolog/vika-pavlova/video_recommendations/offline_recomms/dislikes'
                          ),
            yt_spec_defaults=dict(pool_trees=["physical"],
                                  tentative_pool_trees=["cloud"]),
            parallel_operations_limit=10
           )


def parse_onto_base(recs):

    for rec in recs:

        value = json.loads(rec["value"])

        #thumbnail
        images = value.get('Image', [])
        thumbnail = 'no_thumb'
        for item in images:
            thumbnail = item.get('url', 'no_thumb')
            if thumbnail != 'no_thumb':
                break

        #title
        titles = value.get('Title', [])
        title = 'no_title'
        for item in titles:
            if 'RelevLocale' in item and 'ru' in item['RelevLocale']:
                title = item.get('value', 'no_title')
            if title != 'no_title':
                break

        #description
        defins = value.get('isa', {}).get('Defin', [])
        description = 'no_description'
        for item in defins:
            if 'RelevLocale' in item and 'ru' in item['RelevLocale']:
                description = item.get('value', 'no_description')
            if description != 'no_description':
                break

        #embed_url
        ids = value.get('ids', [])
        if ids:
            embed_url = ids[0].get('value', 'no_url')
            for item in ids:
                if 'kinopoisk' in  item.get('value', 'no_url'):
                    embed_url = item.get('value', 'no_url')
        else:
            embed_url = 'no_url'

        #type
        ugc_types = value.get('isa', {}).get('ugc_type', [])
        if not ugc_types:
            ugc_type = 'no_type'
        else:
            ugc_type =  ugc_types[0]

        yield Record(puid = rec.puid, workerId = rec.workerId,
                     thumbnail = thumbnail, title = title,
                     description = description, embed_url = embed_url, ugc_type = ugc_type, avg_label = -2,
                     assessment = rec.assessment, onto_id = rec['key']
                    )

def serp_dislikes():

    print 'serp'

    job = cluster.job()

    month_ts = time.mktime(datetime.datetime.strptime(str(datetime.datetime.now()).split(' ')[0], "%Y-%m-%d").timetuple()) - 2592000

    print "month_ts is %s" %(str(month_ts))

    ugc = job.table('//home/dict/recommender/actions/latest'
                   ).filter(sf.and_(sf.custom(lambda x, y: x in [1, 2] and y in [0, 0.5, 1, 1.5, 2],
                                              'action_type', 'value'),
                                    sf.custom(lambda x: x > month_ts, 'timestamp')
                                   )
                           ).project(puid = ne.custom(lambda x: x.replace('p/', ''), 'id'),
                                     key = ne.custom(lambda x: x, "to_id"),
                                     assessment = ne.const(-2)
                                    ).unique('puid', 'key', 'assessment')

    tolokers = job.table('//home/videolog/vika-pavlova/video_recommendations/current_tolokers'
                        ).project('workerId',
                                  puid = ne.custom(lambda x: str(x), 'puid')
                                 )

    onto_base = job.table('//home/dict/ontodb/ver/daily/production/all_cards_final')

    actual_urls = job.table('//home/video-hosting/ya-video/actual_urls.full'
                     ).project('UUID',
                               title = ne.custom(lambda x: x, 'ComputedName'),
                               thumbnail = ne.custom(lambda x: x.get('thumbnail', 'no_thumbnail') if x else 'no_thumbnail', 'Resources'),
                               description = ne.custom(lambda x: x, 'Comment'),
                               embed_url = ne.custom(lambda x: 'https://' + x, 'GroupingUrl'),
                               ya_video_preview = ne.custom(lambda x: x.get('ya_video_preview', 'no_preview') if x else 'no_preview', 'Resources'),
                               onto_id = ne.custom(lambda x: x.get("onto_id", 'none'), 'Resources')
                              ).filter(sf.and_(sf.custom(lambda x: x != 'no_thumbnail', 'thumbnail'),
                                               sf.custom(lambda x: x != 'no_preview', 'ya_video_preview')
                                              )
                                      )

    raw = ugc.join(tolokers, by = 'puid'
                  ).join(onto_base, by = 'key'
                        )
    #serp likes
    final = raw.map(parse_onto_base
                   ).filter(sf.and_(sf.custom(lambda x: x != 'no_thumb', 'thumbnail'),
                                    sf.custom(lambda x: x != 'no_title', 'title'),
                                    sf.custom(lambda x: x != 'no_description', 'description'),
                                    sf.custom(lambda x: x != 'no_url', 'embed_url'),
                                    sf.custom(lambda x: x != 'no_type', 'ugc_type')
                                    )
                           ).project('puid', 'workerId', 'thumbnail', 'title',
                                     'description', 'embed_url', 'ugc_type', 'onto_id'
                                    )

    final.project(ne.all(exclude = 'ugc_type')
                 ).put('$job_root/serp_mixed')

    final.filter(sf.equals('ugc_type', 'film')
                ).project(ne.all(exclude = 'ugc_type')
                         ).put('$job_root/serp_film')

    final.filter(sf.equals('ugc_type', 'series')
                ).project(ne.all(exclude = 'ugc_type')
                         ).put('$job_root/serp_series')

    final.filter(sf.equals('ugc_type', 'anim-series')
                ).project(ne.all(exclude = 'ugc_type')
                         ).put('$job_root/serp_anim_series')

    final.filter(sf.equals('ugc_type', 'anim')
                ).project(ne.all(exclude = 'ugc_type')
                         ).put('$job_root/serp_anim_film')

    #serp vh likes
    t = final.project('onto_id', "puid", 'ugc_type', "workerId"
                     ).join(actual_urls, by = 'onto_id'
                            )

    t1 = t.groupby('onto_id', "puid", 'ugc_type', "workerId"
                  ).aggregate(title = na.min('title')
                             )

    vh = t.join(t1, by = ('onto_id', "puid", 'ugc_type', "workerId", 'title'
                         )
               ).project('puid', 'workerId', 'thumbnail', 'title', 'ya_video_preview',
                         'description', 'embed_url', 'ugc_type', 'onto_id', "UUID")

    vh.project(ne.all(exclude = 'ugc_type')
              ).put('$job_root/entity_mixed')

    vh.filter(sf.equals('ugc_type', 'film')
             ).project(ne.all(exclude = 'ugc_type')
                      ).put('$job_root/entity')

    vh.filter(sf.equals('ugc_type', 'series')
             ).project(ne.all(exclude = 'ugc_type')
                      ).put('$job_root/entity_series')

    vh.filter(sf.equals('ugc_type', 'anim-series')
             ).project(ne.all(exclude = 'ugc_type')
                      ).put('$job_root/anim_series_entity')

    vh.filter(sf.equals('ugc_type', 'anim')
             ).project(ne.all(exclude = 'ugc_type')
                      ).put('$job_root/anim_entity')

    job.run()


def efir_dislikes(start, end):

    print 'efir_dislikes'

    job = cluster.job()

    reactions = job.table('//home/videolog/sociality_squeeze/reaction_{' + start + '..' + end +'}')

    puids = job.table('//home/videolog/vika-pavlova/video_recommendations/current_tolokers'
                     ).project('workerId', puid = ne.custom(lambda x: str(x), 'puid'))

    actual_urls = job.table('//home/video-hosting/ya-video/actual_urls.full'
                     ).project('UUID', 'IsMusic',
                               title = ne.custom(lambda x: x, 'ComputedName'),
                               thumbnail = ne.custom(lambda x: x.get('thumbnail', 'no_thumbnail') if x else 'no_thumbnail', 'Resources'),
                               description = ne.custom(lambda x: x, 'Comment'),
                               embed_url = ne.custom(lambda x: 'https://' + x, 'GroupingUrl'),
                               ya_video_preview = ne.custom(lambda x: x.get('ya_video_preview', 'no_preview') if x else 'no_preview', 'Resources'),
                               onto_otype = ne.custom(lambda x: x.get("onto_otype", 'none') if x else 'none', 'Resources'),
                               onto_id = ne.custom(lambda x: x.get("onto_id", 'none'), 'Resources'),
                               blogger_id = ne.custom(lambda x: x.get("blogger_id", 'none') if x else 'none', 'Resources'),
                               tags = ne.custom(lambda x: x.get("carusel_detailed_tags", 'none') if x else 'none', 'Resources')
                              )

    filtered = reactions.filter(sf.and_(sf.equals("action_type", "reaction on video"),
                                        sf.custom(lambda x: x == "dislike", "reaction_type")
                                       )
                                ).project('puid',
                                          UUID = ne.custom(lambda x: x, "uuid")
                                         )

    aggr = filtered.groupby('puid', "UUID"
                           ).aggregate(assessment_count = na.count()
                                      ).project(ne.all(),
                                                avg_label = ne.const(-2),
                                               )

    final = aggr.join(puids, by = 'puid'
                     ).join(actual_urls, by = 'UUID'
                           )

    final.project('UUID', 'title', 'thumbnail', 'description', 'embed_url', 'ya_video_preview',
                  'puid', 'workerId'
                 ).put('$job_root/efir')

    final.filter(sf.equals('onto_otype', "Film/Film")
                ).project('puid', 'workerId', 'UUID', 'title', 'thumbnail', 'description',
                          'embed_url', 'ya_video_preview', 'onto_id'
                         ).put('$job_root/entity', append = True)

    final.filter(sf.equals('onto_otype', "Film/Series@on")
                ).project('puid', 'workerId', 'UUID', 'title', 'thumbnail', 'description',
                          'embed_url', 'ya_video_preview', 'onto_id'
                         ).put('$job_root/entity_series', append = True)

    final.filter(sf.contains('tags', 'anim_film')
                ).project('puid', 'workerId', 'UUID', 'title', 'thumbnail', 'description',
                          'embed_url', 'ya_video_preview', 'onto_id'
                         ).put('$job_root/anim_entity', append = True)

    final.filter(sf.contains('tags', 'anim_series')
                ).project('puid', 'workerId', 'UUID', 'title', 'thumbnail', 'description',
                          'embed_url', 'ya_video_preview', 'onto_id'
                         ).put('$job_root/anim_series_entity', append = True)

    final.filter(sf.custom(lambda x: x in ["Film/Film", "Film/Series@on"], 'onto_otype')
                ).project('puid', 'workerId', 'UUID', 'title', 'thumbnail', 'description',
                          'embed_url', 'ya_video_preview', 'onto_id'
                         ).put('$job_root/entity_mixed', append = True)

    final.filter(sf.not_(sf.equals('blogger_id', "none"))
                ).project('puid', 'workerId', 'UUID', 'title', 'thumbnail', 'description',
                          'embed_url', 'ya_video_preview'
                         ).put('$job_root/bloggers')

    final.filter(sf.equals('IsMusic', True)
                ).project('puid', 'workerId', 'UUID', 'title', 'thumbnail', 'description',
                          'embed_url', 'ya_video_preview'
                         ).put('$job_root/music')

    final.filter(sf.contains('tags', 'sport')
                ).project('puid', 'workerId', 'UUID', 'title', 'thumbnail', 'description',
                          'embed_url', 'ya_video_preview'
                         ).put('$job_root/sport')

    job.run()


def unique_hps():

    print 'unique_hps'

    job = cluster.job()

    for carousel in ['entity_mixed', 'entity', 'entity_series', 'anim_entity', 'anim_series_entity']:
        job.table('$job_root/' + carousel
                 ).unique('puid', 'workerId', 'UUID', 'title', 'thumbnail', 'description',
                          'embed_url', 'ya_video_preview', 'onto_id'
                          ).put('$job_root/' + carousel)

    for carousel in ['serp_film', 'serp_series', 'serp_anim_film', 'serp_anim_series']:
        job.table('$job_root/' + carousel
                 ).unique('puid', 'workerId', 'title', 'thumbnail', 'description',
                          'embed_url', 'onto_id'
                          ).put('$job_root/' + carousel)

    job.run()


def main():

    parser = argparse.ArgumentParser()
    parser.add_argument('--start_date', type=str, required=True)
    parser.add_argument('--end_date', type=str, required=True)
    parser.add_argument('--carousel_type', type=str, required=True)
    args = parser.parse_args()

    cluster.driver.remove('//home/videolog/vika-pavlova/video_recommendations/offline_recomms/dislikes/entity_mixed')
    cluster.driver.remove('//home/videolog/vika-pavlova/video_recommendations/offline_recomms/dislikes/entity')
    cluster.driver.remove('//home/videolog/vika-pavlova/video_recommendations/offline_recomms/dislikes/entity_series')
    cluster.driver.remove('//home/videolog/vika-pavlova/video_recommendations/offline_recomms/dislikes/anim_entity')
    cluster.driver.remove('//home/videolog/vika-pavlova/video_recommendations/offline_recomms/dislikes/anim_series_entity')

    serp_dislikes()

    efir_dislikes(args.start_date, args.end_date)

    unique_hps()


if __name__ == '__main__':
    main()
