# -*- coding: utf-8 -*-


from nile.api.v1 import (
    filters as nf,
    aggregators as na,
    extractors as ne,
    statface as ns,
    clusters,
    Record
)

from qb2.api.v1 import extractors as se, filters as sf
import argparse
import nile
import datetime
import uatraits, json, re
import urllib, urlparse
import math,cgi
import pandas as pd
from itertools import product
import sys
import os

cluster = clusters.yt.Hahn(
).env(templates=dict(job_root='home/videolog/vika-pavlova/catchups'),
      yt_spec_defaults=dict(pool_trees=["physical"],
                            tentative_pool_trees=["cloud"]
                           ),
      parallel_operations_limit=10
     )

def channels(records):

    for rec in records:

        ComputedName = rec.ComputedName
        start_time = rec.start_time
        corrected_start_time = rec.corrected_start_time
        test_start_time = rec.test_start_time
        result = rec.result
        time_to_evaluate = rec.time_to_evaluate

        name_list = ComputedName.split('.')

        for ind in xrange(len(name_list)-2):
            if "Телеканал" in name_list[ind]:
                j = ind + 1
                while not name_list[j].strip().isdigit():
                    j += 1

                channel = '.'.join(name_list[ind:j]).strip()

        yield Record(ComputedName = ComputedName, start_time = start_time,
                     corrected_start_time = corrected_start_time, test_start_time = test_start_time,
                    result = result, time_to_evaluate = time_to_evaluate, channel = channel
                    )

def recs_combination(recs):

    for rec in recs:

        result = rec.result

        recs_list = list(product(
            [rec.channel, '_total_'],
            [rec.time_to_evaluate, '_total_'],
            [rec.ComputedName, '_total_']))

        for triple in recs_list:
            yield Record(channel=triple[0], time_to_evaluate=triple[1], result=result,
                         computed_name=triple[2]
                        )

def process_data_for_stat(date):

    job = cluster.job()

    results = job.table('//home/videolog/vika-pavlova/catchups/results_' + date)

    t = results.project('ComputedName', 'start_time', 'corrected_start_time', 'test_start_time',
                        result = ne.custom(lambda x: x['result'], 'outputValues'),
                        time_to_evaluate = ne.custom(lambda x, y:
                                                     'start_time' if x == y else 'corrected_start_time',
                                                     'test_start_time', 'start_time')
                       )

    tt = t.map(channels, memory_limit=4000)

    tt.map(recs_combination, memory_limit=4000
          ).groupby('computed_name', 'channel', 'time_to_evaluate'
                   ).aggregate(exact = na.count(predicate=nf.custom(lambda x: x == 'exact', 'result')),
                               later = na.count(predicate=nf.custom(lambda x: x == 'later', 'result')),
                               no_play = na.count(predicate=nf.custom(lambda x: x == 'no_play', 'result')),
                               earlier = na.count(predicate=nf.custom(lambda x: x == 'earlier', 'result')),
                               another_telecast = na.count(predicate=nf.custom(lambda x: x == 'another_telecast', 'result')),
                               no_telecast = na.count(predicate=nf.custom(lambda x: x == 'no_telecast', 'result'))
                              ).project('computed_name', 'channel', 'time_to_evaluate',
                                        exact = ne.custom(lambda x: x if x else 0, 'exact'),
                                        later = ne.custom(lambda x: x if x else 0, 'later'),
                                        no_play = ne.custom(lambda x: x if x else 0, 'no_play'),
                                        earlier = ne.custom(lambda x: x if x else 0, 'earlier'),
                                        another_telecast = ne.custom(lambda x: x if x else 0, 'another_telecast'),
                                        no_telecast = ne.custom(lambda x: x if x else 0, 'no_telecast'),
                                        fielddate = ne.const(date)
                                       ).put('$job_root/final_for_stat')

    job.run()

def put_data_to_stat():

    client = ns.StatfaceClient(
        proxy = 'upload.stat.yandex-team.ru',
        token = os.environ['STAT_TOKEN']
    )
    ns.StatfaceReport().path('Video.All/catchups_accuracy') \
                       .scale('daily') \
                       .client(client) \
                       .remote_publish(proxy='hahn',
                                       table_path='//home/videolog/vika-pavlova/catchups/final_for_stat',
                                       async_mode=False,
                                       upload_config=False)

def main():

    parser = argparse.ArgumentParser()
    parser.add_argument('--start_date', type=str, required=True)
    parser.add_argument('--end_date', type=str, required=True)
    args = parser.parse_args()

    for date in pd.date_range(start=args.start_date, end=args.end_date):
        date_str = str(date)[:10]
        process_data_for_stat(date_str)
        put_data_to_stat()

if __name__ == '__main__':
    main()
