# -*- coding: utf-8 -*-
from nile.api.v1 import (
    filters as nf,
    aggregators as na,
    extractors as ne,
    statface as ns,
    clusters,
    Record
)

from qb2.api.v1 import extractors as se, filters as sf
import argparse
import nile
import datetime
import uatraits, json, re
import urllib, urlparse
import math, cgi
import pandas as pd
from itertools import product
import sys
import os


def recs_combination(recs):

    for rec in recs:

        recs_list = list(product(
            (rec.platform, '_total_'),
            (rec.player_id, '_total_')))

        for pair in recs_list:
            yield Record(platform=pair[0], player_id=pair[1], fielddate = rec.date, uid = rec.uid,
                         delta = rec.delta, reqid = rec.reqid, tvt = rec.tvt, url = rec.url,
                         view = rec.view, autoplay = rec.autoplay, start_delta = rec.start_delta
                        )


def process_data_for_stat(date):

    cluster = clusters.yt.Hahn(pool='vika-pavlova'
    ).env(templates=dict(job_root='home/videolog/vika-pavlova/2459-init_pleer_time'
                        ),
          yt_spec_defaults=dict(pool_trees=["physical"],
                                tentative_pool_trees=["cloud"]),
          parallel_operations_limit=10
         )

    job = cluster.job()

    parsed = job.table('//home/videolog/vika-pavlova/one_us_parse_for_all_reports/2459_open_init_' + date
                      ).project(ne.all(exclude='player_id'),
                                player_id = ne.custom(lambda x: x if x else 'other', 'player_id')
                               ).map(recs_combination, memory_limit = 4000
                                    )

    init = parsed.filter(sf.not_(sf.equals('delta', -1)
                                )
                        )

    t1 = init.groupby('platform', 'player_id', 'fielddate'
                  ).aggregate(urls=na.count(),
                              tvt = na.sum('tvt'),
                              views = na.sum('view'),
                              mean_delta = na.mean('delta'),
                              median_delta = na.median('delta'),
                              q10=na.quantile('delta',0.1),
                              q30=na.quantile('delta',0.3),
                              q50=na.quantile('delta',0.5),
                              q70=na.quantile('delta',0.7),
                              q90=na.quantile('delta',0.9),
                              q95=na.quantile('delta',0.95)
                              )

    start = parsed.filter(sf.not_(sf.equals('start_delta', -1)
                                )
                         )

    t2 = start.groupby('platform', 'player_id', 'fielddate'
                  ).aggregate(start_urls=na.count(),
                              start_tvt = na.sum('tvt'),
                              start_views = na.sum('view'),
                              start_mean_delta = na.mean('start_delta'),
                              start_median_delta = na.median('start_delta'),
                              autoplay_count = na.sum('autoplay'),
                              start_q10=na.quantile('start_delta',0.1),
                              start_q30=na.quantile('start_delta',0.3),
                              start_q50=na.quantile('start_delta',0.5),
                              start_q70=na.quantile('start_delta',0.7),
                              start_q90=na.quantile('start_delta',0.9),
                              start_q95=na.quantile('start_delta',0.95)
                              )

    t1.join(t2, by = ('platform', 'player_id', 'fielddate'), type = 'full'
           ).put('$job_root/final_' + date)

    job.run()


def put_data_to_stat(date):

    client = ns.StatfaceClient(
        proxy = 'upload.stat.yandex-team.ru',
        token = os.environ['STAT_TOKEN']
    )
    ns.StatfaceReport().path('Video.All/open_init_time') \
                       .scale('daily') \
                       .client(client) \
                       .remote_publish(proxy='hahn',
                                       table_path='//home/videolog/vika-pavlova/2459-init_pleer_time/final_' + date,
                                       async_mode=False,
                                       upload_config=False)


def main():

    parser = argparse.ArgumentParser()
    parser.add_argument('--start_date', type=str, required=True)
    parser.add_argument('--end_date', type=str, required=True)
    args = parser.parse_args()

    for date in pd.date_range(start=args.start_date, end=args.end_date):
        date_str = str(date)[:10]
        process_data_for_stat(date_str)
        put_data_to_stat(date_str)


if __name__ == '__main__':
    main()
