#!/usr/bin/env python
# -*- coding: utf-8 -*-

from nile.api.v1 import (
    Record,
    files,
    clusters,
    cli,
    with_hints,
    filters as nf,
    aggregators as na,
    extractors as ne,
    statface as ns
)

from qb2.api.v1 import filters as sf

import os #obligatory for Statface
import sys #obligatory for Statface


@with_hints(
    output_schema=dict(yandexuid=str, path=str, unixtime=str, platform=str)
)
def myMap(recs):
    for rec in recs:
        r = rec.value
        rr = dict(x.split('=', 1) for x in r.split('\t') if '=' in x)
        yandexuid = rr["yandexuid"]
        if yandexuid is None or yandexuid == "":
            continue
        if "cid=73096" in r:
            platform = "touch"
            if "path" in rr:
                path = rr["path"]
                yield Record(yandexuid=yandexuid, path=path, unixtime=rr["unixtime"], platform=platform)


@with_hints(
    output_schema=dict(uid=str, green=int, repin=int, like=int, platform=str)
)
def myReduce(recs):
    for key, records in recs:
        green, repin, like = 0, 0, 0
        uid = key.yandexuid
        prev_ts = None
        for rec in records:
            path = rec.path
            current_ts = int(rec.unixtime)
            if path == "finish.card.share":
                repin += 1
            elif path == "click.card.like":
                like += 1
            elif path == "click.link_external" and prev_ts is not None and prev_ts - current_ts > 120:
                green += 1
            prev_ts = current_ts
        if path == "click.link_external":
            green += 1
        yield Record(uid=uid, green=green, repin=repin, like=like, platform=rec.platform)


@with_hints(
    output_schema=dict(cntr=float, platform=str)
)
def calc_metric(recs):
    coefs = [0.14, 0.84, 0.02]
    for rec in recs:
        actions = [rec.green, rec.repin, rec.like]
        cntr = sum([a * b for a, b in zip(actions, coefs)])
        yield Record(cntr=cntr, platform=rec.platform)

@cli.statinfra_job

def make_job(job, nirvana, options, statface_client):

    job = job.env(
        yt_spec_defaults=dict(
            pool_trees=["physical"],
            tentative_pool_trees=["cloud"]
        ),
        templates=dict(
            job_root='home/images/tmp/rkam/MMA-1424'
        )
    )

    report = ns.StatfaceReport() \
        .path('ExtData/PDB/CollectionsUsageMetricV1') \
        .scale('daily') \
        .client(statface_client)

    mydates = options.dates
    strdate = mydates[-1]

    log = job.table('//logs/collections-redir-log/1d/'+strdate)
    log.map(myMap) \
        .groupby('yandexuid').sort('unixtime') \
        .reduce(myReduce) \
        .map(calc_metric) \
        .groupby('platform') \
        .aggregate(metric = na.sum('cntr')) \
        .project('metric', 'platform', fielddate=ne.const(strdate)) \
        .publish(report)

    return job


if __name__ == '__main__':
    cli.run()

