#-*- coding: UTF-8 -*-
import nile
import argparse
import time
from nile.api.v1 import (
    filters as nf,
    aggregators as na,
    extractors as ne,
    statface as ns,
    clusters,
    Record
)
from qb2.api.v1 import (
    extractors as se,
    filters as sf
)
from copy import deepcopy
import uatraits
import urllib
from datetime import datetime as dt, timedelta
import os
import sys

def aggregate_search_stats(groups):
    for key, recs in groups:
        uid = key["uid"]
        if uid.startswith('y'):
            uid = uid[1:]
        elif uid.startswith('uu/'):
            uid = uid[3:]

        browser = "unknown"
        os_family = "unknown"
        football_clubs_requests = {}
        special_events_requests = {}
        video_requests_stats = {}
        web_requests_with_video_intent_stats = {}
        for rec in recs:
            browser = rec["browser"]
            os_family = rec["os_family"]
            for object_id in rec["football_clubs_requests"]:
                football_clubs_requests[object_id] = football_clubs_requests.get(object_id, 0) + rec["football_clubs_requests"][object_id]
            for object_id in rec["special_events_requests"]:
                special_events_requests[object_id] = special_events_requests.get(object_id, 0) + rec["special_events_requests"][object_id]
            for object_id in rec["video_requests_stats"]:
                if object_id in video_requests_stats:
                    video_requests_stats[object_id]["timestamps"] += rec["video_requests_stats"][object_id]["timestamps"]
                    video_requests_stats[object_id]["watches"] += rec["video_requests_stats"][object_id]["watches"]
                else:
                    video_requests_stats[object_id] = {"timestamps" : rec["video_requests_stats"][object_id]["timestamps"],
                                                       "watches" : rec["video_requests_stats"][object_id]["watches"]}
            for object_id in rec["web_requests_with_video_intent_stats"]:
                web_requests_with_video_intent_stats[object_id] = web_requests_with_video_intent_stats.get(object_id, []) + rec["web_requests_with_video_intent_stats"][object_id]
        yield Record(uid=uid, os_family=os_family, browser=browser,
                     football_clubs_requests=football_clubs_requests,
                     special_events_requests=special_events_requests,
                     video_requests_stats=video_requests_stats,
                     web_requests_with_video_intent_stats=web_requests_with_video_intent_stats)

def aggregate_tv_online_stats(groups):
    for key, recs in groups:
        uid = key["uid"]
        tv_online_stats = {}
        for rec in recs:
            for object_id in rec["tv_online_stats"]:
                if object_id in tv_online_stats:
                    tv_online_stats[object_id]["tvt"] += rec["tv_online_stats"][object_id]["tvt"]
                else:
                    tv_online_stats[object_id] = {"computed_channel" : rec["tv_online_stats"][object_id]["computed_channel"],
                                                  "computed_program" : rec["tv_online_stats"][object_id]["computed_program"],
                                                  "tvt" : rec["tv_online_stats"][object_id]["tvt"]}
        yield Record(uid=uid, tv_online_stats=tv_online_stats)

class get_search_uids_for_push(object):
    def __init__(self, config):
        self.config = config
    def __call__(self, recs):
        for rec in recs:
            if self.config["browsers"] and rec["browser"] not in self.config["browsers"]:
                continue
            if self.config["os_families"] and rec["os_family"] not in self.config["os_families"]:
                continue
            need_yield = False
            reqs_with_objects = 0
            reqs_without_objects = 0
            for object_id in rec["football_clubs_requests"]:
                if object_id in self.config["football_clubs_object_ids"]:
                    reqs_with_objects += rec["football_clubs_requests"][object_id]
                else:
                    reqs_without_objects += rec["football_clubs_requests"][object_id]
            if len(self.config["football_clubs_object_ids"]) > 0 and reqs_with_objects >= self.config["football_club_request_threshold"] and reqs_with_objects >= reqs_without_objects:
                need_yield = True
            for object_id in self.config["special_events_object_ids"]:
                if rec["special_events_requests"].get(object_id, 0) >= self.config["special_event_request_threshold"]:
                    need_yield = True
            if need_yield:
                yield Record(uid=rec["uid"])

class get_tv_online_uids_for_push(object):
    def __init__(self, config):
        self.config = config
    def __call__(self, recs):
        for rec in recs:
            tvt = 0
            tvt_with_word = 0
            tvt_without_word = 0
            for object_id in rec["tv_online_stats"]:
                for channel in self.config["tv_online_channels"]:
                    if channel in rec["tv_online_stats"][object_id]["computed_channel"]:
                        if len(self.config["tv_online_program_words"]) > 0:
                            has_word_in_program = False
                            for word in self.config["tv_online_program_words"]:
                                if word in rec["tv_online_stats"][object_id]["computed_program"]:
                                    tvt += rec["tv_online_stats"][object_id]["tvt"]
                                    tvt_with_word += rec["tv_online_stats"][object_id]["tvt"]
                                    has_word_in_program = True
                                    break
                            if not has_word_in_program:
                                tvt_without_word += rec["tv_online_stats"][object_id]["tvt"]
                        else:
                            tvt += rec["tv_online_stats"][object_id]["tvt"]
            if tvt >= self.config["tv_online_tvt_threshold"]:
                if len(self.config["tv_online_program_words"]) == 0:
                    yield rec
                elif tvt_with_word > tvt_without_word:
                    yield rec

UID_SEARCH_STATS_PREFIX = "//home/ether_prod/pushes/mma-2247/web_video_"
UID_TV_ONLINE_STATS_PREFIX = "//home/ether_prod/pushes/mma-2247/tv_online_"
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--date', type=str, required=True)
    parser.add_argument('--days_count', type=int, required=True)
    parser.add_argument('--football_clubs_object_ids', nargs='+', default=[])
    parser.add_argument('--football_club_request_threshold', type=int, default=1)
    parser.add_argument('--special_events_object_ids', nargs='+', default=[])
    parser.add_argument('--special_event_request_threshold', type=int, default=1)
    parser.add_argument('--tv_online_channels', nargs='+', default=[])
    parser.add_argument('--tv_online_program_words', nargs='+', default=[])
    parser.add_argument('--tv_online_tvt_threshold', type=int, default=900)
    parser.add_argument('--browsers', nargs='+', default=[])
    parser.add_argument('--os_families', nargs='+', default=[])
    parser.add_argument('--output_table', type=str, required=True)
    args = parser.parse_args()

    config = { "football_clubs_object_ids" : args.football_clubs_object_ids,
               "football_club_request_threshold" : args.football_club_request_threshold,
               "special_events_object_ids" : args.special_events_object_ids,
               "special_event_request_threshold" : args.special_event_request_threshold,
               "tv_online_channels" : args.tv_online_channels,
               "tv_online_program_words" : args.tv_online_program_words,
               "tv_online_tvt_threshold" : args.tv_online_tvt_threshold,
               "browsers" : args.browsers,
               "os_families" : args.os_families}

    end_date = dt.strptime(args.date, "%Y-%m-%d")

    cluster = clusters.yt.Hahn().env(parallel_operations_limit=10,
                                     yt_spec_defaults=dict(
                                         pool_trees=["physical"],
                                         tentative_pool_trees=["cloud"]
                                     ),
                                     templates=dict(
                                         tmp_root='//tmp',
                                         title='GetUidsForPushes'
                                     ))

    job = cluster.job()

    search_stats_to_concat = [job.table(UID_SEARCH_STATS_PREFIX +  dt.strftime(end_date - timedelta(i), "%Y-%m-%d")) for i in range(args.days_count)]
    tv_online_stats_to_concat = [job.table(UID_TV_ONLINE_STATS_PREFIX +  dt.strftime(end_date - timedelta(i), "%Y-%m-%d")) for i in range(args.days_count)]

    search_uids = job.concat(*search_stats_to_concat).groupby('uid').reduce(aggregate_search_stats) \
                     .map(get_search_uids_for_push(config))

    tv_online_uids = job.concat(*tv_online_stats_to_concat).groupby('uid').reduce(aggregate_tv_online_stats) \
                        .map(get_tv_online_uids_for_push(config))

    job.concat(search_uids, tv_online_uids) \
       .groupby('uid').aggregate(source_count=na.count()) \
       .project(install_id='uid').sort('install_id').put(args.output_table)
    job.run()

if __name__ == '__main__':
    main()
