# -*- coding: utf-8 -*-

from nile.api.v1 import (
    filters as nf,
    aggregators as na,
    extractors as ne,
    statface as ns,
    clusters,
    Record
)

from qb2.api.v1 import extractors as se, filters as sf
import argparse
import nile
import datetime, time
import json, re
import urllib, urlparse
import math,cgi
import pandas as pd
from itertools import product
import sys
import os
import copy
import random


cluster = clusters.yt.Hahn(pool='vika-pavlova'
      ).env(templates=dict(job_root='//home/videolog/vika-pavlova/video_recommendations/offline_recomms/ugc_assessments'
                          ),
            yt_spec_defaults=dict(pool_trees=["physical"],
                                  tentative_pool_trees=["cloud"]),
            parallel_operations_limit=10
           )


def parse_onto_base(recs):

    for rec in recs:

        value = json.loads(rec["value"])

        #thumbnail
        images = value.get('Image', [])
        thumbnail = 'no_thumb'
        for item in images:
            thumbnail = item.get('url', 'no_thumb')
            if thumbnail != 'no_thumb':
                break

        #title
        titles = value.get('Title', [])
        title = 'no_title'
        for item in titles:
            if 'RelevLocale' in item and 'ru' in item['RelevLocale']:
                title = item.get('value', 'no_title')
            if title != 'no_title':
                break

        #description
        defins = value.get('isa', {}).get('Defin', [])
        description = 'no_description'
        for item in defins:
            if 'RelevLocale' in item and 'ru' in item['RelevLocale']:
                description = item.get('value', 'no_description')
            if description != 'no_description':
                break

        #embed_url
        ids = value.get('ids', [])
        if ids:
            embed_url = ids[0].get('value', 'no_url')
            for item in ids:
                if 'kinopoisk' in  item.get('value', 'no_url'):
                    embed_url = item.get('value', 'no_url')
        else:
            embed_url = 'no_url'

        #type
        ugc_types = value.get('isa', {}).get('ugc_type', [])
        if not ugc_types:
            ugc_type = 'no_type'
        else:
            ugc_type =  ugc_types[0]

        #assessment
        avg_label = 0
        if rec['type'] == 2:
            if rec['assessment'] in [4,5]:
                avg_label = 2
            elif rec['assessment'] == 0:
                avg_label = -2

        yield Record(puid = rec.puid, workerId = rec.workerId,
                     thumbnail = thumbnail, title = title,
                     description = description, embed_url = embed_url, ugc_type = ugc_type, avg_label = avg_label,
                     assessment = rec.assessment, assessment_type = rec["type"], onto_id = rec['key']
                    )


def serp_likes():

    job = cluster.job()

    ugc = job.table('//home/dict/recommender/actions/latest'
                   ).project("type",
                             puid = ne.custom(lambda x: x.replace('p/', ''), 'id'),
                             key = ne.custom(lambda x: x, "to_id"),
                             assessment = ne.custom(lambda x: x, "value")
                            )

    tolokers = job.table('//home/videolog/vika-pavlova/video_recommendations/current_tolokers'
                        ).project('workerId',
                                  puid = ne.custom(lambda x: str(x), 'puid')
                                 )

    onto_base = job.table('//home/dict/ontodb/ver/daily/production/all_cards_final')

    raw = ugc.join(tolokers, by = 'puid'
                  ).join(onto_base, by = 'key'
                        )

    final = raw.map(parse_onto_base
                   ).filter(sf.and_(sf.custom(lambda x: x != 'no_thumb', 'thumbnail'),
                            sf.custom(lambda x: x != 'no_title', 'title'),
                            sf.custom(lambda x: x != 'no_description', 'description'),
                            sf.custom(lambda x: x != 'no_url', 'embed_url'),
                            sf.custom(lambda x: x != 'no_type', 'ugc_type'),
                            sf.custom(lambda x: x != 0, 'avg_label')
                           )
                   ).put('//home/videolog/vika-pavlova/video_recommendations/offline_recomms/serp_likes/mixed')

    final.filter(sf.equals('ugc_type', 'film')
                ).put('//home/videolog/vika-pavlova/video_recommendations/offline_recomms/serp_likes/film')

    final.filter(sf.equals('ugc_type', 'series')
                ).put('//home/videolog/vika-pavlova/video_recommendations/offline_recomms/serp_likes/series')

    final.filter(sf.equals('ugc_type', 'anim-series')
                ).put('//home/videolog/vika-pavlova/video_recommendations/offline_recomms/serp_likes/anim_series')

    final.filter(sf.equals('ugc_type', 'anim')
                ).put('//home/videolog/vika-pavlova/video_recommendations/offline_recomms/serp_likes/anim')

    job.run()


def main():

    parser = argparse.ArgumentParser()
    parser.add_argument('--start_date', type=str, required=True)
    parser.add_argument('--end_date', type=str, required=True)
    parser.add_argument('--carousel_type', type=str, required=True)
    args = parser.parse_args()

    serp_likes()


if __name__ == '__main__':
    main()
