# -*- coding: utf-8 -*-

from nile.api.v1 import (
    filters as nf,
    aggregators as na,
    extractors as ne,
    statface as ns,
    clusters,
    Record
)

from qb2.api.v1 import extractors as se, filters as sf
import argparse
import nile
import datetime, time
import json, re
import urllib, urlparse
import math,cgi
import pandas as pd
from itertools import product
import sys
import os
import copy
import random


cluster = clusters.yt.Hahn(pool='vika-pavlova'
      ).env(templates=dict(job_root='home/videolog/vika-pavlova/2406-learn_vs_pool'
                          ),
            yt_spec_defaults=dict(pool_trees=["physical"],
                                  tentative_pool_trees=["cloud"]),
            parallel_operations_limit=10
           )

class gather_data():

    def __init__(self, carousel_type):
        self.carousel_type = carousel_type

    def __call__(self, groups):

        #собираем серповые рекомендации
        if 'serp' in self.carousel_type:

            for key, recs in groups:

                good_hp = 0
                bad_hp = 0
                recoms = 0
                cold_recoms = 0
                toloka_data = []
                is_good_res = 0
                is_bad_res = 0
                is_good_cold = 0
                is_bad_cold = 0
                is_like = 0
                is_dislike = 0

                recs_set = set()

                for rec in recs:

                    if rec['content_type'] == 0 and rec['components']:
                        for recom in rec["components"][:10]:
                            if recom["onto_id"] not in recs_set:
                                recs_set.add(recom["onto_id"])
                                toloka_data.append({
                                    "inputValues": {
                                        "embed_url": recom["componentUrl"]["pageUrl"],
                                        "title": recom["text.title"],
                                        "thumbnail": recom['thumbadd']["urls"][0],
                                        "description": recom["text.snippet"],
                                        "onto_id": recom["onto_id"]
                                }
                            })
                        recoms = 1

                    #добавляем лайки
                    elif rec['content_type'] == 2:
                        if good_hp == 0 and rec['avg_label'] == 2 and rec["onto_id"] not in recs_set:
                            recs_set.add(rec["onto_id"])
                            assessment = round(rec['avg_label'])
                            toloka_data.append({
                                "inputValues": {
                                    "title": rec["title"],
                                    "thumbnail": rec["thumbnail"],
                                    "description": rec["description"],
                                    "onto_id": rec["onto_id"],
                                    "embed_url": rec["embed_url"]
                                },
                                "knownSolutions": [
                                    {
                                        "weight": 1,
                                        "outputValues": {
                                            "label": str(assessment + 1) if assessment != 2 else "+2"
                                        }
                                    },
                                    {
                                        "weight": 1,
                                        "outputValues": {
                                            "label": str(assessment)
                                        }
                                    },
                                    {
                                        "weight": 1,
                                        "outputValues": {
                                            "label": str(assessment - 1)
                                        }
                                    }
                                ]
                            })
                            good_hp = 1
                            is_like = 1

                        elif bad_hp == 0 and rec['avg_label'] == -2 and rec["onto_id"] not in recs_set:
                            recs_set.add(rec["onto_id"])
                            assessment = round(rec['avg_label'])
                            toloka_data.append({
                                "inputValues": {
                                    "title": rec["title"],
                                    "thumbnail": rec["thumbnail"],
                                    "description": rec["description"],
                                    "embed_url": rec["embed_url"],
                                    "onto_id": rec["onto_id"],
                                },
                                "knownSolutions": [
                                    {
                                        "weight": 1,
                                        "outputValues": {
                                            "label": str(assessment + 1)
                                        }
                                    },
                                    {
                                        "weight": 1,
                                        "outputValues": {
                                            "label": str(assessment)
                                        }
                                    },
                                    {
                                        "weight": 1,
                                        "outputValues": {
                                            "label": str(assessment - 1) if assessment != -2 else "-2"
                                        }
                                    }
                                ]
                            })
                            bad_hp = 1
                            is_dislike = 1

                    #добавляем предыдущие результаты как ханипоты
                    elif rec['content_type'] == 3:
                        if good_hp == 0 and rec['avg_label'] >= 0 and rec["onto_id"] not in recs_set:
                            recs_set.add(rec["onto_id"])
                            assessment = round(rec['avg_label'])
                            toloka_data.append({
                                "inputValues": {
                                    "title": rec["title"],
                                    "thumbnail": rec["thumbnail"],
                                    "description": rec["description"],
                                    "embed_url": rec["embed_url"],
                                    "onto_id": rec["onto_id"],
                                },
                                "knownSolutions": [
                                    {
                                        "weight": 1,
                                        "outputValues": {
                                            "label": str(assessment + 1) if assessment != 2 else "+2"
                                        }
                                    },
                                    {
                                        "weight": 1,
                                        "outputValues": {
                                            "label": str(assessment)
                                        }
                                    },
                                    {
                                        "weight": 1,
                                        "outputValues": {
                                            "label": str(assessment - 1)
                                        }
                                    }
                                ]
                            })
                            good_hp = 1
                            is_good_res = 1

                        elif bad_hp == 0 and rec['avg_label'] < 0 and rec["onto_id"] not in recs_set:
                            recs_set.add(rec["onto_id"])
                            assessment = round(rec['avg_label'])
                            toloka_data.append({
                                "inputValues": {
                                    "title": rec["title"],
                                    "thumbnail": rec["thumbnail"],
                                    "description": rec["description"],
                                    "embed_url": rec["embed_url"],
                                    "onto_id": rec["onto_id"],
                                },
                                "knownSolutions": [
                                    {
                                        "weight": 1,
                                        "outputValues": {
                                            "label": str(assessment + 1)
                                        }
                                    },
                                    {
                                        "weight": 1,
                                        "outputValues": {
                                            "label": str(assessment)
                                        }
                                    },
                                    {
                                        "weight": 1,
                                        "outputValues": {
                                            "label": str(assessment - 1) if assessment != -2 else "-2"
                                        }
                                    }
                                ]
                            })
                            bad_hp = 1
                            is_bad_res = 1

                    #добавляем холодный старт
                    elif rec['content_type'] == 1:
                        for recom in rec["components"][:10]:
                            if recom["onto_id"] not in recs_set:
                                recs_set.add(recom["onto_id"])
                                toloka_data.append({
                                    "inputValues": {
                                        "embed_url": recom["componentUrl"]["pageUrl"],
                                        "title": recom["text.title"],
                                        "thumbnail": recom['thumbadd']["urls"][0],
                                        "description": recom["text.snippet"],
                                        "onto_id": recom["onto_id"],
                                }
                            })
                        cold_recoms = 1

                yield Record(puid = key['puid'], workerId = key['workerId'], good_hp = good_hp, bad_hp = bad_hp,
                            recoms = recoms, toloka_data = toloka_data,
                            is_like = is_like, is_dislike = is_dislike, is_good_res = is_good_res,
                            is_bad_res = is_bad_res, cold_recoms = cold_recoms, recs_set = list(recs_set),
                            )
        #собираем эфирные рекомендации
        else:
            for key, recs in groups:
                good_hp = 0
                bad_hp = 0
                recoms = 0
                cold_recoms = 0
                toloka_data = []
                is_like = 0
                is_dislike = 0
                is_good_res = 0
                is_bad_res = 0
                is_good_cold = 0
                is_bad_cold = 0

                recs_set = set()

                for rec in recs:

                    if rec['content_type'] == 0 and rec['components']:
                        for recom in rec["components"][:10]:
                            if recom["text.cardId"] not in recs_set:
                                recs_set.add(recom["text.cardId"])
                                toloka_data.append({
                                    "inputValues": {
                                        "embed_url": recom["componentUrl"]["pageUrl"],
                                        "title": recom["text.title"],
                                        "ya_video_preview": recom['thumbadd']["videoPlayerHtml"],
                                        "thumbnail": recom['thumbadd']["urls"][0],
                                        "description": recom["text.snippet"],
                                        "content_id": recom["text.cardId"]
                                }
                            })
                        recoms = 1

                    #добавляем лайки/дизлайки как ханипоты
                    elif rec['content_type'] == 2:
                        if good_hp == 0 and rec['avg_label'] >= 0 and rec['UUID'] not in recs_set:
                            recs_set.add(rec['UUID'])
                            assessment = round(rec['avg_label'])
                            toloka_data.append({
                                "inputValues": {
                                    "title": rec["title"],
                                    "thumbnail": rec["thumbnail"],
                                    "description": rec["description"],
                                    "embed_url": rec["embed_url"],
                                    "ya_video_preview": rec["ya_video_preview"],
                                    "content_id": rec['UUID']
                                },
                                "knownSolutions": [
                                    {
                                        "weight": 1,
                                        "outputValues": {
                                            "label": str(assessment + 1) if assessment != 2 else "+2"
                                        }
                                    },
                                    {
                                        "weight": 1,
                                        "outputValues": {
                                            "label": str(assessment)
                                        }
                                    },
                                    {
                                        "weight": 1,
                                        "outputValues": {
                                            "label": str(assessment - 1)
                                        }
                                    }
                                ]
                            })
                            good_hp = 1
                            is_like = 1

                        elif bad_hp == 0 and rec['avg_label'] < 0 and rec['UUID'] not in recs_set:
                            recs_set.add(rec['UUID'])
                            assessment = round(rec['avg_label'])
                            toloka_data.append({
                                "inputValues": {
                                    "title": rec["title"],
                                    "thumbnail": rec["thumbnail"],
                                    "description": rec["description"],
                                    "embed_url": rec["embed_url"],
                                    "ya_video_preview": rec["ya_video_preview"],
                                    "content_id": rec['UUID']
                                },
                                "knownSolutions": [
                                    {
                                        "weight": 1,
                                        "outputValues": {
                                            "label": str(assessment + 1)
                                        }
                                    },
                                    {
                                        "weight": 1,
                                        "outputValues": {
                                            "label": str(assessment)
                                        }
                                    },
                                    {
                                        "weight": 1,
                                        "outputValues": {
                                            "label": str(assessment - 1) if assessment != -2 else "-2"
                                        }
                                    }
                                ]
                            })
                            bad_hp = 1
                            is_dislike = 1

                    #добавляем предыдущие результаты как ханипоты
                    elif rec['content_type'] == 3:
                        if good_hp == 0 and rec['avg_label'] >= 0 and rec["content_id"] not in recs_set:
                            recs_set.add(rec["content_id"])
                            assessment = round(rec['avg_label'])
                            toloka_data.append({
                                "inputValues": {
                                    "title": rec["title"],
                                    "thumbnail": rec["thumbnail"],
                                    "description": rec["description"],
                                    "embed_url": rec["embed_url"],
                                    "ya_video_preview": rec["ya_video_preview"],
                                    "content_id": rec["content_id"]
                                },
                                "knownSolutions": [
                                    {
                                        "weight": 1,
                                        "outputValues": {
                                            "label": str(assessment + 1) if assessment != 2 else "+2"
                                        }
                                    },
                                    {
                                        "weight": 1,
                                        "outputValues": {
                                            "label": str(assessment)
                                        }
                                    },
                                    {
                                        "weight": 1,
                                        "outputValues": {
                                            "label": str(assessment - 1)
                                        }
                                    }
                                ]
                            })
                            good_hp = 1
                            is_good_res = 1

                        elif bad_hp == 0 and rec['avg_label'] < 0 and rec["content_id"] not in recs_set:
                            recs_set.add(rec["content_id"])
                            assessment = round(rec['avg_label'])
                            toloka_data.append({
                                "inputValues": {
                                    "title": rec["title"],
                                    "thumbnail": rec["thumbnail"],
                                    "description": rec["description"],
                                    "embed_url": rec["embed_url"],
                                    "ya_video_preview": rec["ya_video_preview"],
                                    "content_id": rec["content_id"]
                                },
                                "knownSolutions": [
                                    {
                                        "weight": 1,
                                        "outputValues": {
                                            "label": str(assessment + 1)
                                        }
                                    },
                                    {
                                        "weight": 1,
                                        "outputValues": {
                                            "label": str(assessment)
                                        }
                                    },
                                    {
                                        "weight": 1,
                                        "outputValues": {
                                            "label": str(assessment - 1) if assessment != -2 else "-2"
                                        }
                                    }
                                ]
                            })
                            bad_hp = 1
                            is_bad_res = 1

                    #добавляем холодный старт
                    elif rec['content_type'] == 1:
                        for recom in rec["components"][:10]:
                            if recom["text.cardId"] not in recs_set:
                                recs_set.add(recom["text.cardId"])
                                toloka_data.append({
                                    "inputValues": {
                                        "embed_url": recom["componentUrl"]["pageUrl"],
                                        "title": recom["text.title"],
                                        "ya_video_preview": recom['thumbadd']["videoPlayerHtml"],
                                        "thumbnail": recom['thumbadd']["urls"][0],
                                        "description": recom["text.snippet"],
                                        "content_id": recom["text.cardId"]
                                }
                            })
                        cold_recoms = 1

                yield Record(puid = key['puid'], workerId = key['workerId'], good_hp = good_hp, bad_hp = bad_hp,
                            recoms = recoms, toloka_data = toloka_data,
                            is_like = is_like, is_dislike = is_dislike, is_good_res = is_good_res,
                            is_bad_res = is_bad_res, cold_recoms = cold_recoms, recs_set = list(recs_set)
                            )


class cold_hp_adding():

    def __init__(self, good_cold_hp, bad_cold_hp, carousel_type):
        self.good_cold_hp = good_cold_hp
        self.bad_cold_hp = bad_cold_hp
        self.carousel_type = carousel_type

    def __call__(self, recs):

        for rec in recs:
            is_bad_cold = 0
            is_good_cold = 0
            bad_hp = rec['bad_hp']
            good_hp = rec['good_hp']
            new_data = copy.copy(rec["toloka_data"])

            if rec['bad_hp'] == 0 and self.bad_cold_hp:
                bad_honeypot = random.choice(self.bad_cold_hp)
                if 'serp' in self.carousel_type and 'ya_video_preview' in bad_honeypot["inputValues"]:
                    del bad_honeypot["inputValues"]['ya_video_preview']
                new_data.append(bad_honeypot)
                is_bad_cold = 1
                bad_hp = 1

            if rec['good_hp'] == 0 and self.good_cold_hp:
                good_honeypot = random.choice(self.good_cold_hp)
                if 'serp' in self.carousel_type and 'ya_video_preview' in good_honeypot["inputValues"]:
                    del good_honeypot["inputValues"]['ya_video_preview']
                new_data.append(good_honeypot)
                is_good_cold = 1
                good_hp = 1

            yield Record(puid = rec['puid'], workerId = rec['workerId'], good_hp = good_hp, bad_hp = bad_hp,
                        recoms = rec['recoms'], toloka_data = new_data,
                        is_like = rec['is_like'], is_dislike = rec['is_dislike'], is_good_res = rec['is_good_res'],
                        is_bad_res = rec['is_bad_res'], is_bad_cold = is_bad_cold, is_good_cold = is_good_cold,
                        cold_recoms = rec['cold_recoms'], recs_set = rec['recs_set']
                        )


def process_data_for_toloka(carousel_type, good_cold_hp, bad_cold_hp, date):

    print 'process_data_for_toloka'

    job = cluster.job()

    likes = job.table('//home/videolog/vika-pavlova/video_recommendations/offline_recomms/all_likes/' + carousel_type
                     ).project(ne.all(),
                               content_type = ne.const(2)
                              )

    results = job.table('//home/videolog/vika-pavlova/video_recommendations/offline_recomms/previous_results/' + carousel_type
                       ).project(ne.all(exclude = 'puid'),
                               content_type = ne.const(3),
                               puid = ne.custom(lambda x: str(x), 'puid')
                              )

    recoms = job.table('//home/videolog/vika-pavlova/video_recommendations/offline_recomms/recoms/' + carousel_type + '/' + date
                       ).project(ne.all(),
                               content_type = ne.const(0)
                              )

    cold_recoms = job.table('//home/videolog/vika-pavlova/video_recommendations/offline_recomms/cold_recoms/' + carousel_type + '/' + date
                       ).project(ne.all(exclude = 'puid'),
                                 content_type = ne.const(1),
                                 puid = ne.custom(lambda x: str(x), 'puid')
                              )

    total = job.concat(likes, results, recoms, cold_recoms
                      )
    """

    total = job.table('//home/videolog/vika-pavlova/video_recommendations/new_honeypots/efir/total'

                                 )"""

    pre_final = total.groupby('puid', 'workerId'
                             ).sort('content_type'
                                   ).reduce(gather_data(carousel_type)
                                           )

    lost = pre_final.filter(sf.custom(lambda x, y: x == 0 or y == 0, 'bad_hp', 'good_hp')
                         ).map(cold_hp_adding(good_cold_hp, bad_cold_hp, carousel_type)
                              )

    tmp = pre_final.filter(sf.custom(lambda x, y: x== 1 and y == 1, 'bad_hp', 'good_hp')
                             )

    job.concat(lost, tmp
              ).filter(sf.equals('recoms', 1)).put('//home/videolog/vika-pavlova/video_recommendations/offline_recomms/data_for_toloka/' + carousel_type + '/' + date)

    job.run()


def main():

    parser = argparse.ArgumentParser()
    parser.add_argument('--carousel_type', type=str, required=True)
    parser.add_argument('--date', type=str, required=True)
    args = parser.parse_args()

    bad_cold_hp = []
    for rec in cluster.read('//home/videolog/vika-pavlova/video_recommendations/offline_recomms/cold_honeypots/' + args.carousel_type + '/bad_cold'):
        bad_cold_hp.append({"inputValues": {
                                "title": rec["title"],
                                "thumbnail": rec["thumbnail"],
                                "description": rec["description"],
                                "embed_url": rec["embed_url"],
                                "ya_video_preview": rec["ya_video_preview"]
                            },
                            "knownSolutions": [
                                {
                                    "weight": 1,
                                    "outputValues": {
                                        "label": "-2"
                                    }
                                },
                                {
                                    "weight": 1,
                                    "outputValues": {
                                        "label": "-1"
                                    }
                                },
                                {
                                    "weight": 0.5,
                                    "outputValues": {
                                        "label": "0"
                                    }
                                }
                            ]
                        })

    good_cold_hp = []
    for rec in cluster.read('//home/videolog/vika-pavlova/video_recommendations/offline_recomms/cold_honeypots/' + args.carousel_type + '/good_cold'):
        good_cold_hp.append({"inputValues": {
                                    "title": rec["title"],
                                    "thumbnail": rec["thumbnail"],
                                    "description": rec["description"],
                                    "embed_url": rec["embed_url"],
                                    "ya_video_preview": rec["ya_video_preview"]
                                },
                            "knownSolutions": [
                                {
                                        "weight": 1,
                                        "outputValues": {
                                            "label": "+2"
                                        }
                                    },
                                    {
                                        "weight": 1,
                                        "outputValues": {
                                            "label": "1"
                                        }
                                    },
                                    {
                                        "weight": 0.5,
                                        "outputValues": {
                                            "label": "0"
                                        }
                                    }
                                ]
                            })

    process_data_for_toloka(args.carousel_type, good_cold_hp, bad_cold_hp, args.date)


if __name__ == '__main__':
    main()
