#!/usr/bin/env python
# -*- coding: utf-8 -*-

from nile.api.v1 import (
    Record,
    files,
    clusters,
    cli,
    files as nfl,
    with_hints,
    extended_schema,
    multischema,
    filters as nf,
    aggregators as na,
    extractors as ne,
    statface as ns #obligatory for Statface
)
from qb2.api.v1 import (
    QB2,
    filters as sf,
    extractors as se,
    resources as sr
)
from qb2.api.v1.typing import Optional, Json, String

import os #obligatory for Statface
import sys #obligatory for Statface
import re #obligatory for Statface
import argparse #obligatory for Statface
import datetime
import time
import re
import ast
import urllib
import itertools

# https://hitman.yandex-team.ru/projects/edinoe_izbrannoe/EI_Stats_MMA_1923


@with_hints(
    output_schema=dict(
        notif_type=str,
        notif_service=str,
        ui=str,
        clicks=int,
        path=str,
        uid=str
    )
)

def add_totals(recs):
    for rec in recs:
        for pair in itertools.product(
                (rec.notif_type, '_total_'),
                (rec.notif_service, '_total_'),
                (rec.ui, '_total_'),
                (rec.clicks, ),
                (rec.path, ),
                (rec.uid, )
                ):
            yield Record(
                notif_type=pair[0],
                notif_service=pair[1],
                ui=pair[2],
                clicks=pair[3],
                path=pair[4],
                uid=pair[5]
                )

@with_hints(
    output_schema=dict(
        page_from=str,
        page_to=str,
        ui=str,
        notif_type=str,
        clicks=int,
        clong=int,
        overlong=int,
        uid=str
    )
)
def add_totals2(recs):
    for rec in recs:
        for pair in itertools.product(
                (rec.page_from, '_total_'),
                (rec.page_to, '_total_'),
                (rec.ui, '_total_'),
                (rec.notif_type, '_total_'),
                (rec.clicks, ),
                (rec.clong, ),
                (rec.overlong, ),
                (rec.uid, )
                ):
            yield Record(
                page_from = pair[0],
                page_to = pair[1],
                ui = pair[2],
                notif_type = pair[3],
                clicks=pair[4],
                clong = pair[5],
                overlong = pair[6],
                uid = pair[7]
                )

# https://clubs.at.yandex-team.ru/yt/2642
@cli.statinfra_job

def make_job(job, nirvana, statface_client, options):

    job = job.env(
        yt_spec_defaults=dict(pool_trees=["physical"], tentative_pool_trees=["cloud"]),
        templates=dict(
            job_root=nirvana.directories[0]
        )
    )

    report2 = ns.StatfaceReport() \
        .path('Notifier/Counters/NotifierClicksStatsV2Conversion') \
        .scale('daily') \
        .client(statface_client)

    report6 = ns.StatfaceReport() \
        .path('Notifier/Counters/NotifierClicksStatsV2Fixed') \
        .scale('daily') \
        .client(statface_client)

    mydates = options.dates

    for strdate in mydates:

        # take metrika data
        sessions_app = job.table("//home/lego/statistics/LEGO-2504/sessions_app_reduce") \
            .filter(sf.custom(lambda x: x > 0, "clicks"))

        sessions_app_clicks = sessions_app.project("uid", "clicks",
            clong="long_clicks", overlong=ne.const(0),
            path=ne.const("notifier.results.click"), notif_service=ne.const("app"),
            page_to=ne.const("app"), page_from=ne.const("app"),
            ui=ne.const("app"), notif_type=ne.const("app"))

        # take redir data
        report_clicks = job.table("$job_root/clicks_" + strdate + "_res").project(ne.all(), clicks = ne.const(1))
        report_longclicks = job.table("$job_root/user_actions_" + strdate + "_res").project(ne.all(), clicks = ne.const(1))

        # calc totals
        report_clicks.concat(sessions_app_clicks) \
            .map(add_totals) \
            .groupby('ui', 'path', 'notif_service', 'notif_type') \
            .aggregate(hits = na.sum("clicks"), uids = na.count_distinct("uid")) \
            .project(ne.all(), fielddate=ne.const(strdate)) \
            .publish(report6, allow_change_job=True)

        target_line = job.table("$job_root/target_line")

        report_longclicks.concat(sessions_app_clicks) \
            .map(add_totals2) \
            .groupby('ui', 'page_from', 'page_to', 'notif_type') \
            .aggregate(
                hits=na.sum('clicks'),
                hits_long=na.sum('clong'),
                hits_overlong=na.sum('overlong'),
                uids=na.count_distinct('uid')
            ) \
            .filter(sf.defined('page_from', 'page_to', 'notif_type')) \
            .concat(target_line) \
            .project(ne.all(), fielddate=ne.const(strdate)) \
            .publish(report2, allow_change_job=True)

    return job


if __name__ == '__main__':
    cli.run()

