#!/usr/bin/env python
# -*- coding: utf-8 -*-

from nile.api.v1 import (
    Record,
    files,
    clusters,
    cli,
    with_hints,
    filters as nf,
    aggregators as na,
    extractors as ne,
    statface as ns #obligatory for Statface
)

import os #obligatory for Statface
import sys #obligatory for Statface
import re #obligatory for Statface
import argparse #obligatory for Statface
import getpass #obligatory for Statface
import json
import itertools

ugc2 = ["finish.card.comment"] + ["finish.card.create", "pass.contest.submission"] + ["film_sbs"]
ugc3 = ["finish.question.submit", "finish.comment.submit", "finish.answer.submit"]

@with_hints(
    output_schema=dict(
        actiontype2=int,
        actiontype=str,
        device=str,
        service=str,
        fielddate=str,
        hits=int,
        flag=int
    )
)
def recode(recs):
    for rec in recs:
        actiond = rec.actiond
        actiontype2 = 0# is UGC by definition or not; binary
        if rec.service in ["answers", "collections"]:
            if actiond in ugc2 + ugc3:
                actiontype2 = 1
        elif rec.actiontype=="3" or rec.actiontype=="2":
            actiontype2 = 1
        yield Record(actiontype=rec.actiontype, actiontype2=actiontype2,
                     device=rec.device, service=rec.service, hits=rec.hits,
                     fielddate=rec.fielddate[:10], flag=rec.flag)

@with_hints(
    output_schema=dict(
        m1=int,
        m2=int,
        m3=int,
        device=str,
        service=str,
        fielddate=str,
        flag=int
    )
)
def calc_stats_wweights(recs):
    for key, records in recs:

        fielddate = key.fielddate
        device = key.device
        service = key.service
        flag = key.flag

        coefs = {"1": 1.0, "2": 3.0, "3": 12.0, "4": 15.0}
        metric_list_1 = []# full
        metric_list_2 = []# no login
        metric_list_3_ugc = []
        for rec in records:
            try:
                hits2 = rec.hits * rec.actiontype2
                value = rec.actiontype
                if value=="4":
                    metric_list_1.append(coefs[value] * rec.hits)
                elif value=="1":
                    metric_list_1.append(coefs[value] * rec.hits)
                    metric_list_2.append(coefs[value] * rec.hits)
                else:
                    metric_list_1.append(coefs[value] * rec.hits)
                    metric_list_2.append(coefs[value] * rec.hits)
                    metric_list_3_ugc.append(coefs[value] * hits2)
            except:
                pass
        yield Record(service=service, fielddate=fielddate, device=device, flag=flag,
                     m1=sum(metric_list_1),
                     m2=sum(metric_list_2),
                     m3=sum(metric_list_3_ugc))


@with_hints(
    output_schema=dict(
        m1=int,
        m2=int,
        m3=int,
        device=str,
        service=str,
        fielddate=str
    )
)
def add_totals_v3(recs):
    for rec in recs:
        for pair in itertools.product(
                (rec.service, '_total_'),
                (rec.device, '_total_'),
                (rec.fielddate, ),
                (rec.m1, ),
                (rec.m2, ),
                (rec.m3, )
                ):
            yield Record(
                service=pair[0],
                device=pair[1],
                fielddate=pair[2],
                m1=pair[3],
                m2=pair[4],
                m3=pair[5]
                )

@with_hints(
    output_schema=dict(
        m1=int,
        m2=int,
        m3=int,
        device=str,
        service=str,
        fielddate=str
    )
)
def replace_ugc(recs):
    for rec in recs:
        service = rec.service
        if service!="collections" and service!="answers" and service!="sprav":
            service = "ugc"
        yield Record(service = service,
                    device = rec.device,
                    fielddate = rec.fielddate,
                    m1 = rec.m1,
                    m2 = rec.m2,
                    m3 = rec.m3)


@cli.statinfra_job

def make_job(job, nirvana, statface_client):

    input_table = nirvana.input_tables[0]
    input_table2 = nirvana.input_tables[1]
    output_table = nirvana.output_tables[0]

    job = job.env(
        yt_spec_defaults=dict(
            pool_trees=["physical"],
            tentative_pool_trees=["cloud"]
        ),
        templates=dict(
            job_root='//home/images/tmp/rkam/MMA-1597'
        )
    )

    report = ns.StatfaceReport() \
        .path('Adhoc/rkam/UGC_metricV1') \
        .scale('daily') \
        .client(statface_client)

    report3 = ns.StatfaceReport() \
        .path('Adhoc/rkam/UGC_metricV1_withLogins') \
        .scale('daily') \
        .client(statface_client)

    report4 = ns.StatfaceReport() \
        .path('Adhoc/rkam/EntitySearch_UGC_metricV1_withLogins') \
        .scale('daily') \
        .client(statface_client)

    log = job.table(input_table).map(recode)

    m5 = log.groupby('fielddate', 'service', 'device', 'flag').reduce(calc_stats_wweights)
    detailed, main = m5.split(nf.equals('flag', 'main'))

    detailed.map(add_totals_v3) \
        .groupby('service', 'device', 'fielddate') \
        .aggregate(
            metric = na.sum('m1'),
            metric_nologin = na.sum('m2'),
            metric_ugc_only = na.sum('m3')
        ) \
        .sort('fielddate', 'service', 'device') \
        .publish(report, allow_change_job=True)

    main.map(add_totals_v3) \
        .groupby('service', 'device', 'fielddate') \
        .aggregate(
            metric = na.sum('m1'),
            metric_nologin = na.sum('m2'),
            metric_ugc_only = na.sum('m3')
        ) \
        .sort('fielddate', 'service', 'device') \
        .put(output_table) \
        .publish(report3, allow_change_job=True)


    entity_s = job.table(input_table2).map(recode)
    entity_s.groupby('fielddate', 'service', 'device', 'flag').reduce(calc_stats_wweights) \
        .map(add_totals_v3) \
        .groupby('service', 'device', 'fielddate') \
        .aggregate(
            metric = na.sum('m1'),
            metric_nologin = na.sum('m2'),
            metric_ugc_only = na.sum('m3')
        ) \
        .sort('fielddate', 'service', 'device') \
        .publish(report4, allow_change_job=True)


    return job


if __name__ == '__main__':
    cli.run()


