#!/usr/bin/env python
# -*- coding: utf-8 -*-

# https://st.yandex-team.ru/EXPERIMENTS-21325

from nile.api.v1 import (
    Record,
    files,
    clusters,
    cli,
    with_hints,
    filters as nf,
    aggregators as na,
    extractors as ne,
    statface as ns #obligatory for Statface
)
from qb2.api.v1 import (
    QB2,
    resources as sr,
    filters as qf
)

import os #obligatory for Statface
import sys #obligatory for Statface
import re #obligatory for Statface
import argparse #obligatory for Statface
import getpass #obligatory for Statface
import datetime
import time
import re


@with_hints(
    output_schema=dict(
        page_id=str,
        click=int,
        show=int,
        cost=float,
        testid=str
    )
)
def add_totals(recs):
    for rec in recs:
        yield rec
        yield Record(page_id = "_total_",
                    click = rec.click,
                    show = rec.show,
                    cost = rec.cost,
                    testid = rec.testid)


def parse_from_path(s):
    splitted_s = s.split('/')
    table = splitted_s[-1]
    job_root = "/".join(splitted_s[0:-1])
    return [job_root, table]


@cli.statinfra_job(options=[cli.Option('test_ids', default='?')])

def make_job(job, nirvana, options):
    output_table = nirvana.output_tables[0]
    output_folder = parse_from_path(output_table)[0]
    table_name = parse_from_path(output_table)[1]

    job = job.env(
        yt_spec_defaults=dict(
            pool_trees=["physical"],
            tentative_pool_trees=["cloud"]
        ),
        templates=dict(
            job_root=output_folder,
            tmp_files=output_folder + "/temporary"
        )
    )

    testids = options.test_ids
    if testids == "?":
        print >> sys.stderr, 'wrong testids'

    uids = job.table("$tmp_files" + "/" + table_name + "_0uids")
    midresult = job.table("$tmp_files" + "/" + table_name + "_1money")
    midresult_j = midresult.join(uids, by=('uid', 'date')) \
        .put("$tmp_files" + "/" + table_name + "_2users_with_money")

    result = midresult_j.groupby('page_id', 'testid', 'bucket') \
        .aggregate(
            show = na.sum('show'),
            click = na.sum('click'),
            cost = na.sum('cost')
        ).put("$tmp_files" + "/" + table_name +"_3statb")

    result.map(add_totals) \
        .groupby('page_id', 'testid') \
        .aggregate(
            show = na.sum('show'),
            click = na.sum('click'),
            cost = na.sum('cost')
        ) \
        .sort('page_id', 'testid') \
        .put(output_table)#, schema=dict(page_id=str, testid=str, show=int, click=int, cost=float))

    my_testids = testids.replace(" ", "").split(",")

    job.concat(*[result.filter(nf.equals('testid', i)).groupby('testid','bucket').aggregate(money = na.sum('cost')) for i in my_testids]) \
            .put("$tmp_files" + "/" + table_name + "_buckets")
#            .put(output_table + "_buckets")

    return job


if __name__ == '__main__':
    cli.run()

