# -*- coding: utf-8 -*-

from nile.api.v1 import (
    filters as nf,
    aggregators as na,
    extractors as ne,
    statface as ns,
    clusters,
    Record
)

from qb2.api.v1 import extractors as se, filters as sf
import argparse
import nile
import datetime, time
import uatraits, json, re
import urllib, urlparse
import math,cgi
import pandas as pd
from itertools import product
import sys
import os

import requests
import json
# import http.client
import httplib

cluster_1 = clusters.yt.Hahn(pool='vika-pavlova'
                            ).env(templates=dict(job_root='//home/videolog/vika-pavlova/5336-bloggers_baskets'
                                                ),
                                  yt_spec_defaults=dict(pool_trees=["physical"],
                                                        use_default_tentative_pool_trees = True),
                                  parallel_operations_limit=10
                                 )

cluster_2 = clusters.yt.Arnold(pool='vika-pavlova'
                            ).env(templates=dict(job_root='//home/videolog/vika-pavlova/5336-bloggers_baskets'
                                                ),
                                  yt_spec_defaults=dict(pool_trees=["physical"],
                                                        use_default_tentative_pool_trees = True),
                                  parallel_operations_limit=10
                                 )

class metrics_format():
    def __init__(self, device):
        self.device = device

    def __call__(self, recs):
        for rec in recs:
            text = rec["blogger"].strip()
            regionId = 225
            country = "RU"
            device = 'DESKTOP' if self.device == 'desktop' else 'ANDROID'
            params = [
                {"name": "content_type", "value": 'bloggers'},
                {"name": "urls", "value": json.dumps(rec["videos"])},
                {"name": "query_type", "value": 'title'}
            ]
            basket_device = self.device
            labels = ['blogger', basket_device]

            yield Record(text = text, regionId = regionId, country = country, device = device,
                         params = params, labels = labels
                        )

def fresh_videos(groups):
    for key, recs in groups:
        top1_ts = 0
        top2_ts = 0
        top3_ts = 0
        top1_url = ''
        top2_url = ''
        top3_url = ''

        for rec in recs:
            if rec.get("release_date", 0):
                if int(rec.get("release_date", 0)) > top1_ts:
                    top1_ts = int(rec.get("release_date", 0))
                    top1_url = 'https://frontend.vh.yandex.ru/player/' + rec["JoinKey"]
                elif int(rec.get("release_date", 0)) <= top1_ts and int(rec.get("release_date", 0)) > top2_ts:
                    top2_ts = int(rec.get("release_date", 0))
                    top2_url = 'https://frontend.vh.yandex.ru/player/' + rec["JoinKey"]
                elif int(rec.get("release_date", 0)) <= top2_ts and int(rec.get("release_date", 0)) > top3_ts:
                    top3_ts = int(rec.get("release_date", 0))
                    top3_url = 'https://frontend.vh.yandex.ru/player/' + rec["JoinKey"]
        last_videos = {
            'top_1': {
                "release_date": top1_ts,
                "url": top1_url
            },
            'top_2': {
                "release_date": top2_ts,
                "url": top2_url
            },
            'top_3': {
                "release_date": top3_ts,
                "url": top3_url
            }
        }
        yield Record(blogger = key.blogger, videos = last_videos)

def gather_fresh_baskets():

    job = cluster_2.job()

    iron_branch = job.table('//home/videolog/strm_meta/iron_branch/concat')

    filtered = iron_branch.filter(sf.custom(lambda x, y, z, a: x and x.startswith("Youtube.") and
                                            'ya-video' in y
                                            and not z and a.isdigit(),
                                            'computed_channel', 'TMP_OvsServiceFlags', 'deleted', "JoinKey")
                                 ).project("release_date", "JoinKey", 'computed_channel',
                                           blogger = ne.custom(lambda x: x.replace("Youtube.", ""),
                                                               'computed_channel'),
                                           GroupingUrl = ne.custom(lambda x: 'frontend.vh.yandex.ru/player/' + x,
                                                                   "JoinKey")
                                          )

    direct_index = job.table('//home/videoindex/vhs/docbase/dynamic/direct_index'
                            ).project('GroupingUrl')

    joined = direct_index.join(filtered, by = 'GroupingUrl'
                              )

    full = joined.groupby('blogger'
                         ).reduce(fresh_videos
                                 )
    desktop = full.random(2000)
    desktop.map(metrics_format('desktop')
               ).put('$job_root/desktop_fresh_bloggers_basket')

    full.join(desktop, by = 'blogger', type = 'left_only'
             ).random(2000
                     ).map(metrics_format('touch')
                           ).put('$job_root/touch_fresh_bloggers_basket')

    job.run()

def pop_videos(groups):
    for key, recs in groups:
        top1 = 0
        top2 = 0
        top3 = 0
        top1_url = ''
        top2_url = ''
        top3_url = ''

        for rec in recs:
            if rec.get("views", 0):
                if rec.get("views", 0) > top1:
                    top1 = rec.get("views", 0)
                    top1_url = 'https://frontend.vh.yandex.ru/player/' + rec["JoinKey"]
                elif rec.get("views", 0) <= top1 and rec.get("views", 0) > top2:
                    top2 = rec.get("views", 0)
                    top2_url = 'https://frontend.vh.yandex.ru/player/' + rec["JoinKey"]
                elif rec.get("views", 0) <= top2 and rec.get("views", 0) > top3:
                    top3 = rec.get("views", 0)
                    top3_url = 'https://frontend.vh.yandex.ru/player/' + rec["JoinKey"]
        pop_videos = {}

        if top1 > 0:
            pop_videos['top_1'] = {
                    "views": top1,
                    "url": top1_url
                }
        if top2 > 0:
            pop_videos['top_2'] = {
                    "views": top2,
                    "url": top2_url
                }
        if top3 > 0:
            pop_videos['top_2'] = {
                    "views": top3,
                    "url": top3_url
                }
        if pop_videos:
            yield Record(blogger = key.blogger, videos = pop_videos)

def gather_pop_baskets(start_date, end_date):

    job = cluster_1.job()

    logs = job.table('//cubes/video-strm/{' + start_date +'..' + end_date + '}/sessions'
                    )

    popularity = logs.filter(sf.custom(lambda x: x and x.startswith("Youtube."), 'channel')
                          ).project("video_content_id", "view_time_non_muted",
                                    JoinKey = 'video_content_id',
                                    blogger = ne.custom(lambda x: x.replace("Youtube.", ""),
                                                        'channel'),
                                    view = ne.custom(lambda x: 1 if x > 0 else 0, "view_time_non_muted")
                                   ).groupby('JoinKey'
                                            ).aggregate(view = na.sum('view')
                                                       ).put('$job_root/month_log')

    iron_branch = job.table('//home/videolog/strm_meta/iron_branch/concat')

    filtered = iron_branch.filter(sf.custom(lambda x, y, z, a: x and x.startswith("Youtube.") and
                                            'ya-video' in y
                                            and not z and a.isdigit(),
                                            'computed_channel', 'TMP_OvsServiceFlags', 'deleted', "JoinKey")
                                 )

    joined = filtered.join(popularity, by = "JoinKey", type = 'left'
                          ).project("JoinKey",'view','computed_channel',
                                    views = ne.custom(lambda x: x if x else 0, 'view'),
                                    blogger = ne.custom(lambda x: x.replace("Youtube.", ""),
                                                               'computed_channel')
                                   ).put('$job_root/joined')

    full = joined.groupby('blogger'
                         ).reduce(pop_videos
                                 )
    desktop = full.random(2000)
    desktop.map(metrics_format('desktop')
               ).put('$job_root/desktop_pop_bloggers_basket')

    full.join(desktop, by = 'blogger', type = 'left_only'
            ).random(2000
                    ).map(metrics_format('touch')
                         ).put('$job_root/touch_pop_bloggers_basket')

    job.run()

def basket_upload(oauth_token):
    baskets_dict = {"desktop_pop_bloggers_basket": "405147",
                    "touch_pop_bloggers_basket": "405148",
                    "desktop_fresh_bloggers_basket": "404959",
                    "touch_fresh_bloggers_basket": "404960"
                   }
    for key, value in baskets_dict.iteritems():
        basket = []
        if 'pop' in key:
            cluster = cluster_1
        else:
            cluster = cluster_2
        for rec in cluster.driver.read('//home/videolog/vika-pavlova/5336-bloggers_baskets/' + key):
            basket.append({
                'country': rec['country'],
                "device": rec["device"],
                "labels": rec["labels"],
                "params": rec["params"],
                "regionId": rec["regionId"],
                "text": rec["text"]
            })
        print basket[0]
        my_data = {
                "queries": basket,
                "type": "RAW"
        }
        url = 'http://metrics.yandex-team.ru/api-qgaas/basket/%s/query-generator?comment=from_apii' %(value)
        r = requests.put(url = url,
                headers = {'Content-Type': 'application/json',
                        'charset': 'UTF-8',
                        "Authorization" : "OAuth " + oauth_token},
                data = json.dumps(my_data).encode('utf-8')
            )
        print r

def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--start_date', type=str, required=True)
    parser.add_argument('--end_date', type=str, required=True)
    parser.add_argument('--oauth_token', type=str, required=True)
    args = parser.parse_args()

    gather_fresh_baskets()
    gather_pop_baskets(args.start_date, args.end_date)
    basket_upload(args.oauth_token)

if __name__ == '__main__':
    main()
