#-*- coding: UTF-8 -*-
import nile
import argparse
import time
from nile.api.v1 import (
    filters as nf,
    aggregators as na,
    extractors as ne,
    statface as ns,
    clusters,
    Record,
    cli,
    with_hints,
)
from qb2.api.v1 import (
    extractors as qe,
    filters as qf
)
from copy import deepcopy
import datetime
import os
import sys
import json


@with_hints(output_schema=dict(
    uid=str, device=str, domain=str, fielddate=str, comment_load=int, comment_more=int, comment_written=int, new_comment_written=int, reply_written=int, comment_push = int, complains=int,
    reaction_count=int, reaction_like=int, reaction_dislike=int, active_commenter=int, user_count=int, real_user_count=int)
    )
def extract_features(groups):
    for key, records in groups:
        comment_load = 0
        comment_more = 0
        comment_written = 0
        comment_push = 0
        new_comment_written = 0
        reply_written = 0
        complains = 0
        reaction_count = 0
        reaction_like = 0
        reaction_dislike = 0
        active_action = 0

        device = "unknown"
        domain = "unknown"

        k = key.to_dict()
        uid = k.get("XYandexICookie", "")
        date = k.get("fielddate", "")

        for record in records:
            rec = record.to_dict()

            if not rec.get("Success", False): # filtering retries
                continue

            try:
                stats = json.loads(rec.get("Stats", "{}"))
            except:
                stats = {}
            if str(stats.get("dnt", "0")) == "1":
                continue
            if stats.get("p"):
                device = stats.get("p")
            if stats.get("l"):
                domain = stats.get("l")

            from_push = False
            if stats.get("welcome", "") == "videopreview":
                from_push = True

            if rec["HttpMethod"] == "GET" and rec["GraphName"] == "GRAPH_TREE":
                comment_load += 1
            elif rec["HttpMethod"] == "GET" and rec["GraphName"] == "GRAPH_MESSAGE_INFO":
                comment_more += 1
                active_action += 1
            elif rec["HttpMethod"] == "PUT" and rec["GraphName"] == "GRAPH_PUSH":
                comment_written += 1
                active_action += 1
                if rec["ReplyTo"]:
                    reply_written += 1
                else:
                    new_comment_written += 1
                if from_push:
                    comment_push += 1
            elif rec["HttpMethod"] == "PUT" and rec["GraphName"] == "GRAPH_COMPLAIN":
                complains += 1
                active_action += 1
            elif rec["HttpMethod"] == "PUT" and rec["GraphName"] == "GRAPH_REACTION":
                reaction_count += 1
                active_action += 1
                if rec["ReactionType"] == 128077:
                    reaction_like += 1
                elif rec["ReactionType"] == 128078:
                    reaction_dislike += 1
        for dom in [domain, "_total_"]:
            for dev in [device, "_total_"]:
                yield Record(uid = uid, device = dev, domain = dom, fielddate = date,
                            comment_load = comment_load, comment_more = comment_more,
                            comment_written = comment_written, new_comment_written = new_comment_written, reply_written = reply_written, comment_push = comment_push,
                            complains = complains,
                            reaction_count = reaction_count, reaction_like = reaction_like, reaction_dislike = reaction_dislike,
                            active_commenter = 1 if comment_written > 0 else 0, user_count = 1, real_user_count = 1 if active_action else 0
                            )


@cli.statinfra_job
def make_job(job, options, statface_client):

    job = job.env(
        yt_spec_defaults=dict(
            pool_trees=["physical"],
            use_default_tentative_pool_trees=True
            ),
        templates=dict(
            tmp_root='//tmp',
            title='Social_Video'
            )
        )

    report = ns.StatfaceReport() \
        .path('Video.All/social') \
        .scale('daily') \
        .client(statface_client)
    report_hourly = ns.StatfaceReport() \
        .path('Video.All/social') \
        .scale('hourly') \
        .client(statface_client)

    date = options.dates[0]

    cmnt_log = job.table("//logs/cmnt-production-cmnt-api-access-log/1d/" + date) \
            .qb2(
                log='generic-yson-log',
                fields=[
                    qe.log_field('ApiKey'),
                    qe.log_field('UserAction'),
                    qe.log_field('GraphName'),
                    qe.log_field('HttpMethod'),
                    qe.log_field('TestBuckets'),
                    qe.log_field('event_timestamp'),
                    qe.log_field('YandexUid'),
                    qe.log_field('XYandexICookie'),
                    qe.log_field('ServiceSlug'),
                    qe.log_field('ReactionType'),
                    qe.log_field('ReplyTo'),
                    qe.log_field('Success'),
                    qe.log_field('Stats'),
                    qe.custom('fielddate', lambda x: datetime.datetime.fromtimestamp(x/1000000).strftime("%Y-%m-%d"), 'event_timestamp').with_type(str)
                ],
                filters=[
                    qf.defined('XYandexICookie'),
                    qf.custom(lambda x: x and x != '0' and x != '-' and x != 'undefined', 'XYandexICookie'),
                    qf.equals('ServiceSlug',"video")
                ],
                mode='records',
            ) \
            .groupby('XYandexICookie', 'fielddate') \
            .reduce(extract_features) \
            .groupby('fielddate', 'device', 'domain') \
                .aggregate(
                            comment_load=na.sum('comment_load'),
                            comment_more = na.sum('comment_more'),
                            comment_written=na.sum('comment_written'),
                            new_comment_written=na.sum('new_comment_written'),
                            reply_written=na.sum('reply_written'),
                            comment_push=na.sum('comment_push'),
                            complains=na.sum('complains'),
                            reaction_count=na.sum('reaction_count'),
                            reaction_like=na.sum('reaction_like'),
                            reaction_dislike=na.sum('reaction_dislike'),
                            active_commenter=na.sum('active_commenter'),
                            user_count=na.sum('user_count'),
                            real_user_count=na.sum('real_user_count'),
                          ) \
                 .project(ne.all(), theme=ne.const("unknown")) \
                 .publish(report)
    cmnt_log_hourly = job.table("//logs/cmnt-production-cmnt-api-access-log/1d/" + date) \
            .qb2(
                log='generic-yson-log',
                fields=[
                    qe.log_field('ApiKey'),
                    qe.log_field('UserAction'),
                    qe.log_field('GraphName'),
                    qe.log_field('HttpMethod'),
                    qe.log_field('TestBuckets'),
                    qe.log_field('event_timestamp'),
                    qe.log_field('YandexUid'),
                    qe.log_field('XYandexICookie'),
                    qe.log_field('ServiceSlug'),
                    qe.log_field('ReactionType'),
                    qe.log_field('ReplyTo'),
                    qe.log_field('Success'),
                    qe.log_field('Stats'),
                    qe.custom('fielddate', lambda x: datetime.datetime.fromtimestamp(x/1000000).strftime('%Y-%m-%d %H:00:00'), 'event_timestamp').with_type(str)
                ],
                filters=[
                    qf.defined('XYandexICookie'),
                    qf.custom(lambda x: x and x != '0' and x != '-' and x != 'undefined', 'XYandexICookie'),
                    qf.equals('ServiceSlug',"video")
                ],
                mode='records',
            ) \
            .groupby('XYandexICookie', 'fielddate') \
            .reduce(extract_features) \
            .groupby('fielddate', 'device', 'domain') \
                .aggregate(
                            comment_load=na.sum('comment_load'),
                            comment_more = na.sum('comment_more'),
                            comment_written=na.sum('comment_written'),
                            new_comment_written=na.sum('new_comment_written'),
                            reply_written=na.sum('reply_written'),
                            comment_push=na.sum('comment_push'),
                            complains=na.sum('complains'),
                            reaction_count=na.sum('reaction_count'),
                            reaction_like=na.sum('reaction_like'),
                            reaction_dislike=na.sum('reaction_dislike'),
                            active_commenter=na.sum('active_commenter'),
                            user_count=na.sum('user_count'),
                            real_user_count=na.sum('real_user_count'),
                          ) \
                 .project(ne.all(), theme=ne.const("unknown")) \
                 .publish(report_hourly)

    return job


if __name__ == '__main__':
    cli.run()
