# -*- coding: utf-8 -*-

from nile.api.v1 import (
    filters as nf,
    aggregators as na,
    extractors as ne,
    statface as ns,
    clusters,
    Record
)

from qb2.api.v1 import extractors as se, filters as sf
import argparse
import nile
import datetime, time
import uatraits, json, re
import urllib, urlparse
import math,cgi
import pandas as pd
from itertools import product
import sys
import os

import requests
import json
# import http.client
import httplib

cluster = clusters.yt.Hahn(pool='vika-pavlova'
                          ).env(templates=dict(job_root='//home/videolog/vika-pavlova/5125-p1_vh_queries'
                                              ),
                                yt_spec_defaults=dict(pool_trees=["physical"],
                                                      use_default_tentative_pool_trees = True),
                                parallel_operations_limit=10
                                )

def filter_queries(recs):
    for rec in recs:
        bad_query = 0
        bad_words = [u'трейлер' , u'ютуб', u'отзывы', u'актер', u'актёр', u'актрис', u'режиссер', u'музыка', u'песн', u'саундтрек', u'аудио', u'дата выхода',
                        u'когда выйдут', u'дата выхода', u'где снимали', u'картинк', u'фото', u'скачать', u'торрент', u'расписание', u'кинотеатр', u'википеди', u'вики',
                        u'сколько серий', u'сколько сезонов', u'похожие', u'актрис', u'слушать', u'персонаж', u'когда выйдет', u'год выпуска', u'кто такой', u'артист',
                        u'кто играл', u'где снимался', u'описание', u'концовка', u'объяснение', u'ost', u'кратко', u'гоблин', u'сцена', u'постер', u'кадр', u'снимал',
                        u'отзыв', u'цитата', u'содержание']
        for word in bad_words:
            if word in str(rec["NormalizedQuery"]).decode('utf-8').lower():
                bad_query = 1
                break
        if bad_query == 0:
            yield Record(rec)

class metrics_format():
    def __init__(self, basket_type, device):
        self.basket_type = basket_type
        self.device = device

    def __call__(self, recs):
        for rec in recs:
            text = rec["NormalizedQuery"]
            regionId = 225
            country = "RU"
            device = self.device
            params = [
                {"name": "content_type", "value": self.basket_type},
                {"name": "license", "value": rec["license"].lower()},
                {"name": "urls", "value": json.dumps(rec["content_ids"]) if self.basket_type == 'series'
                                          else "https://frontend.vh.yandex.ru/player/" + str(rec["content_ids"][0])},
                {"name": "ontoid", "value": rec["onto_id"]},
                {"name": "query_type", "value": 'query'}
            ]
            basket_device = 'desktop' if self.device == 'DESKTOP' else 'touch'
            labels = [self.basket_type, basket_device]
            if rec['license']:
                labels.append(rec['license'].lower())

            yield Record(text = text, regionId = regionId, country = country, device = device,
                         params = params, labels = labels
                        )

def reduce_recs(groups):

    for key, recs in groups:
        license = ''
        content_ids = set()
        for rec in recs:
            content_ids.add(rec['ContentGroupID'])
            if rec['license']:
                license = rec['license']

        yield Record(onto_id = key['onto_id'], content_ids = list(content_ids), license = license)

def gather_baskets(start_date, end_date):

    job = cluster.job()

    desktop_logs = job.table('//home/dict/ontodb/squeezer/{' + start_date +'..' + end_date + '}/web'
                    ).project("Query", "NormalizedQuery", "UI", "ServiceDomRegion", "EntitySearch",
                              ontoid = ne.custom(lambda x: x.get("OntoID"), "EntitySearch"),
                              ontoid_accept = ne.custom(lambda x: x.get("Accept"), "EntitySearch"),
                              otype = ne.custom(lambda x: x.get("OType"), "EntitySearch"),
                              osubtype = ne.custom(lambda x: x.get("OSubType"), "EntitySearch"),
                             ).filter(sf.custom(lambda x, y, z, a, b, c: x and y and z == 'ru' and
                                                a == "Film" and b in ["Film", "Series"] and c == 'desktop', 'ontoid',
                                                'ontoid_accept', "ServiceDomRegion", 'otype', 'osubtype', 'UI')
                                     ).groupby("NormalizedQuery", 'ontoid', 'osubtype'
                                              ).aggregate(reqs = na.count()
                                                         ).map(filter_queries)

    touch_logs = job.table('//home/dict/ontodb/squeezer/{' + start_date +'..' + end_date + '}/web'
                    ).project("Query", "NormalizedQuery", "UI", "ServiceDomRegion", "EntitySearch",
                              ontoid = ne.custom(lambda x: x.get("OntoID"), "EntitySearch"),
                              ontoid_accept = ne.custom(lambda x: x.get("Accept"), "EntitySearch"),
                              otype = ne.custom(lambda x: x.get("OType"), "EntitySearch"),
                              osubtype = ne.custom(lambda x: x.get("OSubType"), "EntitySearch"),
                             ).filter(sf.custom(lambda x, y, z, a, b, c: x and y and z == 'ru' and
                                                a == "Film" and b in ["Film", "Series"] and c == 'touch', 'ontoid',
                                                'ontoid_accept', "ServiceDomRegion", 'otype', 'osubtype', 'UI')
                                     ).groupby("NormalizedQuery", 'ontoid', 'osubtype'
                                              ).aggregate(reqs = na.count()
                                                         ).map(filter_queries)

    concat = job.table('//home/videolog/strm_meta/iron_branch/concat'
                      ).filter(sf.custom(lambda x, y, z: x == False and 'ya-video' in y and z,
                                         'deleted', 'TMP_OvsServiceFlags', 'onto_id'
                                        )
                              ).groupby('onto_id'
                                       ).reduce(reduce_recs)

    desktop_top = desktop_logs.groupby('ontoid', 'osubtype'
                                      ).aggregate(total_reqs = na.sum('reqs')
                                                 )

    touch_top = touch_logs.groupby('ontoid', 'osubtype'
                                  ).aggregate(total_reqs = na.sum('reqs')
                                             )

    #desktop films
    desktop_films = desktop_top.filter(sf.and_(sf.equals('osubtype', "Film"),
                                               sf.custom(lambda x: u'сериал' not in str(x).decode('utf-8').lower() and
                                                                   u'серия' not in str(x).decode('utf-8').lower() and
                                                                   u'сезон' not in str(x).decode('utf-8').lower(), "NormalizedQuery"
                                                        )
                                              )
                                      ).join(concat, by_left = 'ontoid', by_right = 'onto_id'
                                            )
    desktop_top_films_avod = desktop_films.filter(sf.and_(sf.equals('osubtype', "Film"),
                                                          sf.equals('license', 'Avod')
                                                         )
                                                 ).top(100, by = 'total_reqs'
                                                      )
    desktop_top_films_svod = desktop_films.filter(sf.and_(sf.equals('osubtype', "Film"),
                                                          sf.equals('license', 'Svod')
                                                         )
                                                 ).top(100, by = 'total_reqs')
    desktop_top_films_tvod = desktop_films.filter(sf.and_(sf.equals('osubtype', "Film"),
                                                          sf.equals('license', 'Tvod_Est')
                                                         )
                                                 ).top(100, by = 'total_reqs')
    desktop_top_films = job.concat(desktop_top_films_avod, desktop_top_films_svod, desktop_top_films_tvod)
    desktop_final_films = desktop_logs.join(desktop_top_films, by = 'ontoid'
                                           ).groupby('ontoid'
                                                    ).top(50, by = 'reqs'
                                                         )
    desktop_final_films.filter(sf.equals("license", 'Avod')
                              ).map(metrics_format('film', 'DESKTOP')
                                   ).put('//home/videolog/vika-pavlova/5125-p1_vh_queries/desktop_query_avod_film_basket')
    desktop_final_films.filter(sf.equals("license", 'Svod')
                              ).map(metrics_format('film', 'DESKTOP')
                                   ).put('//home/videolog/vika-pavlova/5125-p1_vh_queries/desktop_query_svod_film_basket')
    desktop_final_films.filter(sf.equals("license", 'Tvod_Est')
                              ).map(metrics_format('film', 'DESKTOP')
                                   ).put('//home/videolog/vika-pavlova/5125-p1_vh_queries/desktop_query_tvod_est_film_basket')

    #touch films
    touch_films = touch_top.filter(sf.and_(sf.equals('osubtype', "Film"),
                                           sf.custom(lambda x: u'фильм' not in str(x).decode('utf-8').lower(), "NormalizedQuery")
                                          )
                                  ).join(concat, by_left = 'ontoid', by_right = 'onto_id'
                                        )
    touch_top_films_avod = touch_films.filter(sf.and_(sf.equals('osubtype', "Film"),
                                                      sf.equals('license', 'Avod')
                                                     )
                                             ).top(100, by = 'total_reqs'
                                                      )
    touch_top_films_svod = touch_films.filter(sf.and_(sf.equals('osubtype', "Film"),
                                                          sf.equals('license', 'Svod')
                                                         )
                                                 ).top(100, by = 'total_reqs')
    touch_top_films_tvod = touch_films.filter(sf.and_(sf.equals('osubtype', "Film"),
                                                          sf.equals('license', 'Tvod_Est')
                                                         )
                                                 ).top(100, by = 'total_reqs')
    touch_top_films = job.concat(touch_top_films_avod, touch_top_films_svod, touch_top_films_tvod)
    touch_final_films = touch_logs.join(touch_top_films, by = 'ontoid'
                                           ).groupby('ontoid'
                                                    ).top(50, by = 'reqs'
                                                         )
    touch_final_films.filter(sf.equals("license", 'Avod')
                            ).map(metrics_format('film', "ANDROID")
                                 ).put('//home/videolog/vika-pavlova/5125-p1_vh_queries/touch_query_avod_film_basket')
    touch_final_films.filter(sf.equals("license", 'Svod')
                            ).map(metrics_format('film', "ANDROID")
                                 ).put('//home/videolog/vika-pavlova/5125-p1_vh_queries/touch_query_svod_film_basket')
    touch_final_films.filter(sf.equals("license", 'Tvod_Est')
                            ).map(metrics_format('film', "ANDROID")
                                 ).put('//home/videolog/vika-pavlova/5125-p1_vh_queries/touch_query_tvod_est_film_basket')

    #desktop series
    desktop_series = desktop_top.filter(sf.equals('osubtype', "Series")
                                       ).join(concat, by_left = 'ontoid', by_right = 'onto_id')
    desktop_top_series_avod = desktop_series.filter(sf.and_(sf.equals('osubtype', "Series"),
                                                            sf.equals('license', 'Avod')
                                                           )
                                                   ).top(100, by = 'total_reqs')
    desktop_top_series_svod = desktop_series.filter(sf.and_(sf.equals('osubtype', "Series"),
                                                            sf.equals('license', 'Svod')
                                                           )
                                                   ).top(100, by = 'total_reqs')
    desktop_top_series_tvod = desktop_series.filter(sf.and_(sf.equals('osubtype', "Series"),
                                                            sf.equals('license', 'Tvod_Est')
                                                           )
                                                   ).top(100, by = 'total_reqs')
    desktop_top_series = job.concat(desktop_top_series_avod, desktop_top_series_svod, desktop_top_series_tvod)

    desktop_final_series = desktop_logs.join(desktop_top_series, by = 'ontoid'
                                            ).groupby('ontoid'
                                                     ).top(50, by = 'reqs'
                                                          )
    desktop_final_series.filter(sf.equals("license", 'Avod')
                               ).map(metrics_format('series', 'DESKTOP')
                                    ).put('//home/videolog/vika-pavlova/5125-p1_vh_queries/desktop_query_avod_series_basket')
    desktop_final_series.filter(sf.equals("license", 'Svod',)
                                ).map(metrics_format('series', 'DESKTOP')
                                    ).put('//home/videolog/vika-pavlova/5125-p1_vh_queries/desktop_query_svod_series_basket')
    desktop_final_series.filter(sf.equals("license", 'Tvod_Est')
                                ).map(metrics_format('series', 'DESKTOP')
                                    ).put('//home/videolog/vika-pavlova/5125-p1_vh_queries/desktop_query_tvod_est_series_basket')

    #touch series
    touch_series = touch_top.filter(sf.equals('osubtype', "Series")
                                    ).join(concat, by_left = 'ontoid', by_right = 'onto_id')
    touch_top_series_avod = touch_series.filter(sf.and_(sf.equals('osubtype', "Series"),
                                                        sf.equals('license', 'Avod')
                                                        )
                                                ).top(100, by = 'total_reqs')
    touch_top_series_svod = touch_series.filter(sf.and_(sf.equals('osubtype', "Series"),
                                                            sf.equals('license', 'Svod')
                                                           )
                                                   ).top(100, by = 'total_reqs')
    touch_top_series_tvod = touch_series.filter(sf.and_(sf.equals('osubtype', "Series"),
                                                            sf.equals('license', 'Tvod_Est')
                                                           )
                                                   ).top(100, by = 'total_reqs')
    touch_top_series = job.concat(touch_top_series_avod, touch_top_series_svod, touch_top_series_tvod)

    touch_final_series = touch_logs.join(touch_top_series, by = 'ontoid'
                                            ).groupby('ontoid'
                                                     ).top(50, by = 'reqs'
                                                          )
    touch_final_series.filter(sf.equals("license", 'Avod')
                               ).map(metrics_format('series', "ANDROID")
                                    ).put('//home/videolog/vika-pavlova/5125-p1_vh_queries/touch_query_avod_series_basket')
    touch_final_series.filter(sf.equals("license", 'Svod',)
                                ).map(metrics_format('series', "ANDROID")
                                    ).put('//home/videolog/vika-pavlova/5125-p1_vh_queries/touch_query_svod_series_basket')
    touch_final_series.filter(sf.equals("license", 'Tvod_Est')
                                ).map(metrics_format('series', "ANDROID")
                                    ).put('//home/videolog/vika-pavlova/5125-p1_vh_queries/touch_query_tvod_est_series_basket')
    job.run()

def basket_upload(oauth_token):
    baskets_dict = {"desktop_query_avod_film_basket": "392729",
                    "desktop_query_svod_film_basket": "392733",
                    "desktop_query_tvod_est_film_basket": "392734",
                    "desktop_query_avod_series_basket": "392876",
                    "desktop_query_svod_series_basket": "392877",
                    "desktop_query_tvod_est_series_basket": "392878",
                    "touch_query_avod_film_basket": "394043",
                    "touch_query_svod_film_basket": "394044",
                    "touch_query_tvod_est_film_basket": "394045",
                    "touch_query_avod_series_basket": "394046",
                    "touch_query_svod_series_basket": "394047",
                    "touch_query_tvod_est_series_basket": "394048",
                   }
    for key, value in baskets_dict.iteritems():
        basket = []
        for rec in cluster.driver.read('//home/videolog/vika-pavlova/5125-p1_vh_queries/' + key):
            basket.append({
                'country': rec['country'],
                "device": rec["device"],
                "labels": rec["labels"],
                "params": rec["params"],
                "regionId": rec["regionId"],
                "text": rec["text"]
            })
        print basket[0]
        my_data = {
                "queries": basket,
                "type": "RAW"
        }
        url = 'http://metrics.yandex-team.ru/api-qgaas/basket/%s/query-generator?comment=from_apii' %(value)
        r = requests.put(url = url,
                headers = {'Content-Type': 'application/json',
                        'charset': 'UTF-8',
                        "Authorization" : "OAuth " + oauth_token},
                data = json.dumps(my_data).encode('utf-8')
            )
        print r

def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--start_date', type=str, required=True)
    parser.add_argument('--end_date', type=str, required=True)
    parser.add_argument('--oauth_token', type=str, required=True)
    args = parser.parse_args()

    gather_baskets(args.start_date, args.end_date)
    basket_upload(args.oauth_token)

if __name__ == '__main__':
    main()
