# -*- coding: utf-8 -*-

from nile.api.v1 import (
    filters as nf,
    aggregators as na,
    extractors as ne,
    statface as ns,
    clusters,
    Record
)

from qb2.api.v1 import extractors as se, filters as sf
import argparse
import nile
import datetime, time
import json, re
import urllib, urlparse
import math,cgi
import pandas as pd
from itertools import product
import sys
import os
import copy
import random


cluster = clusters.yt.Hahn(pool='vika-pavlova'
      ).env(templates=dict(job_root='//home/videolog/vika-pavlova/video_recommendations'
                          ),
            yt_spec_defaults=dict(pool_trees=["physical"],
                                  tentative_pool_trees=["cloud"]),
            parallel_operations_limit=10
           )

def exists(path, yt):
        return yt.exists(path)


def previous_results(carousel_type):

    yt = cluster.driver.client

    if exists('//home/videolog/vika-pavlova/video_recommendations/results/' + carousel_type, yt):

        results = list(yt.search(root = '//home/videolog/vika-pavlova/video_recommendations/results/' + carousel_type, node_type = 'table'))

        week_ts = time.mktime(datetime.datetime.strptime(str(datetime.datetime.now()).split(' ')[0], "%Y-%m-%d").timetuple()) - 604800

        job = cluster.job()

        puids = job.table('//home/videolog/vika-pavlova/video_recommendations/current_tolokers'
                        ).project('workerId', puid = ne.custom(lambda x: str(x), 'puid'))

        full = job.concat(*[job.table(x) for x in results]
                        ).project(ne.all(),
                                content_id = ne. custom(lambda x: x.get('json_output', {}).get("card", {}).get('includes', [])[0].get('content_id', None)
                                                                    if 'includes' in x.get('json_output', {}).get("card", {})
                                                                    else x.get('json_output', {}).get("card", {}).get('content_id', None),
                                                                    "outputValues"
                                                        ),
                                onto_id = ne. custom(lambda x: x.get('json_output', {}).get("card", {}).get('includes', [])[0].get('onto_id', None)
                                                                if 'includes' in x.get('json_output', {}).get("card", {})
                                                                else x.get('json_output', {}).get("card", {}).get('onto_id', None),
                                                                "outputValues"
                                                        ),
                                label = ne. custom(lambda x: x.get("label", "None"), "outputValues")
                                )


        filtered = full.filter(sf.custom(lambda x, y, z: x and y != 'None' and y != "-3" and z > week_ts, 'content_id', 'label', "submitTs"),
                            ).project('workerId', 'content_id', 'onto_id',
                                        label = ne.custom(lambda x: int(x), 'label'),
                                        title = ne.custom(lambda x: x.get('json_output', {}).get("card", {}).get('includes', [])[0].get('title', "")
                                                                        if 'includes' in x.get('json_output', {}).get("card", {})
                                                                        else x.get('json_output', {}).get("card", {}).get('title', ""),
                                                                        "outputValues"
                                                        ),
                                        thumbnail = ne.custom(lambda x: x.get('json_output', {}).get("card", {}).get('includes', [])[0].get('thumbnail', "")
                                                                            if 'includes' in x.get('json_output', {}).get("card", {})
                                                                            else x.get('json_output', {}).get("card", {}).get('thumbnail', ""),
                                                                            "outputValues"
                                                            ),
                                        description = ne.custom(lambda x: x.get('json_output', {}).get("card", {}).get('includes', [])[0].get('description', "")
                                                                                if 'includes' in x.get('json_output', {}).get("card", {})
                                                                                else x.get('json_output', {}).get("card", {}).get('description', ""),
                                                                                "outputValues"
                                                                ),
                                        embed_url = ne.custom(lambda x: x.get('json_output', {}).get("card", {}).get('includes', [])[0].get('embed_url', "")
                                                                                if 'includes' in x.get('json_output', {}).get("card", {})
                                                                                else x.get('json_output', {}).get("card", {}).get('embed_url', ""),
                                                                                "outputValues"
                                                            ),
                                        ya_video_preview = ne.custom(lambda x: x.get('json_output', {}).get("card", {}).get('includes', [])[0].get('ya_video_preview', "")
                                                                                    if 'includes' in x.get('json_output', {}).get("card", {})
                                                                                    else x.get('json_output', {}).get("card", {}).get('ya_video_preview', ""),
                                                                                    "outputValues"
                                                                    )
                            ).join(puids, by = 'workerId')

        filtered.groupby('workerId', 'puid', 'content_id', 'onto_id', 'title', 'thumbnail', 'description', 'embed_url', 'ya_video_preview'
                        ).aggregate(avg_label = na.mean('label'),
                                    assessment_count = na.count()
                                ).filter(sf.custom(lambda x: x <= -1, 'avg_label')
                                        ).unique('workerId', 'puid', 'content_id', 'onto_id', 'title', 'thumbnail', 'description', 'embed_url', 'ya_video_preview'
                                                ).project('workerId', 'puid', 'content_id', 'onto_id', 'title', 'thumbnail', 'description', 'embed_url', 'ya_video_preview'
                                                            ).put('//home/videolog/vika-pavlova/video_recommendations/offline_recomms/previous_results/' + carousel_type)

        job.run()


def previous_offline_results(carousel_type):

    if 'serp' in carousel_type:
        job = cluster.job()

        job.table('//home/videolog/vika-pavlova/video_recommendations/offline_recomms/previous_results/' + carousel_type
                 ). filter(sf.equals("title", "NoneNoneNone")
                          ).put('//home/videolog/vika-pavlova/video_recommendations/offline_recomms/previous_results/' + carousel_type)
        job.run()

    yt = cluster.driver.client

    if exists('//home/videolog/vika-pavlova/video_recommendations/offline_results/' + carousel_type, yt):

        results = list(yt.search(root = '//home/videolog/vika-pavlova/video_recommendations/offline_results/' + carousel_type, node_type = 'table'))

        week_ts = time.mktime(datetime.datetime.strptime(str(datetime.datetime.now()).split(' ')[0], "%Y-%m-%d").timetuple()) - 604800

        if results:

            job = cluster.job()

            puids = job.table('//home/videolog/vika-pavlova/video_recommendations/current_tolokers'
                            ).project('workerId', puid = ne.custom(lambda x: str(x), 'puid'))

            full = job.concat(*[job.table(x) for x in results]
                            ).project(ne.all(),
                                        content_id = ne.custom(lambda x: x.get("content_id"), "inputValues"),
                                        onto_id = ne.custom(lambda x: x.get("onto_id"), "inputValues"),
                                        label = ne. custom(lambda x: x.get("label", "None"), "outputValues")
                                        )

            filtered = full.filter(sf.custom(lambda y, z: y != 'None' and y != "-3" and z > week_ts, 'label', "submitTs"),
                                ).project('workerId', 'content_id', 'onto_id',
                                            label = ne.custom(lambda x: int(x), 'label'),
                                            title = ne.custom(lambda x: x.get('title', ""), "inputValues"),
                                            thumbnail = ne.custom(lambda x: x.get('thumbnail', ""), "inputValues"),
                                            description = ne.custom(lambda x: x.get('description', ""), "inputValues"),
                                            embed_url = ne.custom(lambda x: x.get('embed_url', ""), "inputValues"),
                                            ya_video_preview = ne.custom(lambda x: x.get('ya_video_preview', ""), "inputValues")
                                ).join(puids, by = 'workerId')

            filtered.groupby('workerId', 'content_id', 'puid', 'title', 'thumbnail', 'description', 'embed_url', 'ya_video_preview', 'onto_id'
                            ).aggregate(avg_label = na.mean('label'),
                                        assessment_count = na.count()
                                    ).filter(sf.custom(lambda x: x <= -1, 'avg_label')
                                        ).unique('workerId', 'puid', 'content_id', 'onto_id', 'title', 'thumbnail', 'description', 'embed_url'
                                                ).project('workerId', 'puid', 'content_id', 'onto_id', 'title', 'thumbnail', 'description', 'embed_url', 'ya_video_preview'
                                                            ).put('//home/videolog/vika-pavlova/video_recommendations/offline_recomms/previous_results/' + carousel_type, append = True
                                                                )
            job.run()

        job = cluster.job()

        job.table('//home/videolog/vika-pavlova/video_recommendations/offline_recomms/previous_results/' + carousel_type
                ).unique('workerId', 'puid', 'content_id', 'onto_id', 'title', 'thumbnail', 'description', 'embed_url'
                        ).put('//home/videolog/vika-pavlova/video_recommendations/offline_recomms/previous_results/' + carousel_type
                            )

        job.run()


def merge_prev_results(carousel_type, efir_prev_results):

    print "merge previous results"

    job = cluster.job()

    serp_results = job.table('//home/videolog/vika-pavlova/video_recommendations/offline_recomms/previous_results/' + carousel_type
                            ).project('workerId', 'puid', 'onto_id', 'title', 'thumbnail', 'description', 'embed_url')

    efir_results = job.table('//home/videolog/vika-pavlova/video_recommendations/offline_recomms/previous_results/' + efir_prev_results
                            ).project('workerId', 'puid', 'onto_id', 'title', 'thumbnail', 'description', 'embed_url')

    job.concat(serp_results, efir_results
              ).put('//home/videolog/vika-pavlova/video_recommendations/offline_recomms/previous_results/' + carousel_type)

    job.concat(serp_results, efir_results
              ).put('//home/videolog/vika-pavlova/video_recommendations/offline_recomms/previous_results/merged_' + carousel_type)

    job.run()

    job = cluster.job()

    job.table('//home/videolog/vika-pavlova/video_recommendations/offline_recomms/previous_results/' + carousel_type
             ).unique('workerId', 'puid', 'onto_id', 'title', 'thumbnail', 'description', 'embed_url'
                     ).put('//home/videolog/vika-pavlova/video_recommendations/offline_recomms/previous_results/' + carousel_type
                          )

    job.run()


def main():

    parser = argparse.ArgumentParser()
    parser.add_argument('--date', type=str, required=True)
    parser.add_argument('--carousel_type', type=str, required=True)
    args = parser.parse_args()

    print args.carousel_type

    if 'serp' not in args.carousel_type:
        previous_results(args.carousel_type)

    previous_offline_results(args.carousel_type)

    if args.carousel_type == 'serp_film':
        merge_prev_results(args.carousel_type, 'entity')
    elif args.carousel_type == 'serp_series':
        merge_prev_results(args.carousel_type, 'entity_series')
    elif args.carousel_type == 'serp_anim_film':
        merge_prev_results(args.carousel_type, 'anim_entity')
    elif args.carousel_type == 'serp_anim_series':
        merge_prev_results(args.carousel_type, 'anim_series_entity')


if __name__ == '__main__':
    main()
