# -*- coding: utf-8 -*-

from nile.api.v1 import (
    filters as nf,
    aggregators as na,
    extractors as ne,
    statface as ns,
    clusters,
    Record
)

from qb2.api.v1 import extractors as se, filters as sf
import argparse
import nile
import datetime
import uatraits, json, re
import urllib, urlparse
import math,cgi
import pandas as pd
from itertools import product
import sys
import os


cluster = clusters.yt.Hahn(pool='vika-pavlova'
      ).env(templates=dict(job_root='//home/videolog/vika-pavlova/5044-kp_subs/report'
                          ),
            yt_spec_defaults=dict(pool_trees=["physical"],
                                  use_default_tentative_pool_trees = True),
            parallel_operations_limit=10
           )

def parse_video_us(recs):
    for rec in recs:

        if rec["ReqID"] != rec["serpid"]:
            continue

        if rec["ui"] in ['desktop', 'touch', 'pad']:
            ui = rec['ui']
        else:
            ui = 'other'

        category = 'other'
        if "obj_type" in rec and rec['obj_type']:
            if rec['obj_type'].startswith("Film/Film/"):
                category = 'film'
            elif rec['obj_type'].startswith("Film/Series/"):
                category = 'series'

        vh_tvt = 0
        if "results" in rec and rec["results"]:
            for res in rec["results"]:
                if res["player"] == "vh":
                    vh_tvt += res["tvt"]

        yield Record(uid = rec["uid"], reqid = rec["ReqID"], query = rec["query"],
                     ui = ui, onto_id = rec["ontoid"], onto_type = rec['obj_type'],
                     category = category, vh_tvt = vh_tvt, service = 'video'
                    )

def parse_web_us(recs):
    for rec in recs:

        if rec["PageNo"] != 0:
            continue

        if rec["UI"] in ['desktop', 'touch', 'pad']:
            ui = rec["UI"]
        else:
            ui = 'other'

        if rec["EntitySearch"].get("Accept", False) and rec["EntitySearch"].get("OntoID", ''):
            ontoid = rec["EntitySearch"].get("OntoID", '')
        ontoid = ''
        onto_type = ''
        category = 'other'
        if rec.get("OType") == "Film":
            if rec.get("OSubType") == "Film":
                category = 'film'
                onto_type = "Film/Film/"
            elif rec.get("OSubType") == "Series":
                category = 'series'
                onto_type = "Film/Series/"

        yield Record(uid = rec["UID"], reqid = rec["ReqId"], query = rec["Query"],
                     ui = ui, onto_id = ontoid, onto_type = onto_type,
                     category = category, service = 'web'
                    )

def recs_combination(recs):
    for rec in recs:
        recs_list = list(product(
            (rec.service, '_total_'),
            (rec.subscription, '_total_'),
            (rec.category, '_total_'),
            (rec.ui, '_total_'),
            (rec.is_on_vh, '_total_')
        ))
        for item in recs_list:
            yield Record(service=item[0], subscription=item[1], query_category = item[2],
                         ui = item[3], is_on_vh = item[4], vh_tvt = rec.vh_tvt,
                         uid = rec.uid, fielddate = rec.fielddate, reqid = rec.reqid, is_ott = rec.is_ott
                        )

def prepare_for_stat(date):

    job = cluster.job()

    subs = job.table('//home/videolog/vika-pavlova/2394-report_from_redir_log/user_licences_' + date
                    ).project("uid", "subscription"
                             ).unique("uid", "subscription")

    video_logs = job.table('//home/videolog/24julia/video_queries_cube/' + date
                          ).map(parse_video_us
                               ).join(subs, by = "uid")

    web_logs = job.table('//home/dict/ontodb/squeezer/' + date + '/web'
                        ).map(parse_web_us
                             )

    vh_session = job.table('//cubes/video-strm/' + date + '/sessions'
                          ).project('ref_from',
                                    vh_tvt = 'view_time',
                                    content_uuid = 'UUID',
                                    reqid = 'reqid',
                                    uid = ne.custom(lambda x: 'y' + x if x else None, "yandexuid")
                                    )

    web_with_tvt = web_logs.join(vh_session, by = ('uuid', 'reqid'), type = 'left'
                                ).join(subs, by = "uid")

    concat = job.table('//home/videolog/strm_meta/iron_branch/concat'
                      ).groupby('onto_id'
                               ).aggregate(name = na.any('Name'),
                                           deleted = na.any('deleted'),
                                           ContentGroupID = na.any('ContentGroupID'),
                                           computed_channel = na.any('computed_channel')
                                          )

    all_logs = job.concat(video_logs, web_with_tvt
                         ).put('$job_root/all_logs')

    final = all_logs.join(concat, by_left = 'ontoid', by_right = 'onto_id', type = 'left'
                         ).project(ne.all(exclude='vh_tvt'),
                                   is_on_vh = ne.custom(lambda x: 'on_vh' if x else 'not_on_vh', 'ContentGroupID'),
                                   fielddate = ne.const(date),
                                   vh_tvt = ne.custom(lambda x: x if x else 0, 'vh_tvt'),
                                   is_ott = ne.custom(lambda x: 'yes' if x == "ott" else 'no', "computed_channel")
                                   ).put('$job_root/final')

    final.map(recs_combination
             ).put('$job_root/recs_combination').groupby('service', 'subscription', 'query_category', 'ui', 'is_on_vh', 'fielddate'
                       ).aggregate(uid_count = na.count_distinct('uid'),
                                   reqid_count = na.count_distinct('reqid'),
                                   vh_tvt = na.sum('vh_tvt'),
                                   ott_uids = na.count_distinct('uid', predicate=nf.custom(lambda x, y: x == 'yes' and y > 0 , 'is_ott', 'vh_tvt'))
                                  ).put('$job_root/final_' + date)

    job.run()

def put_data_to_stat(date):
    client = ns.StatfaceClient(
        proxy = 'upload.stat.yandex-team.ru',
        token = os.environ['STAT_TOKEN']
    )
    ns.StatfaceReport().path('Video.All/user_subs_report'
                            ).scale('daily'
                                   ).client(client
                                           ).remote_publish(proxy='hahn',
                                                            table_path='//home/videolog/vika-pavlova/5044-kp_subs/report/final_' + date,
                                                            async_mode=False,
                                                            upload_config=False
                                                           )

def main():

    parser = argparse.ArgumentParser()
    parser.add_argument('--start_date', type=str, required=True)
    parser.add_argument('--end_date', type=str, required=True)
    args = parser.parse_args()

    for date in pd.date_range(start=args.start_date, end=args.end_date):
        date_str = str(date)[:10]
        prepare_for_stat(date_str)
        put_data_to_stat(date_str)

if __name__ == '__main__':
    main()
