# -*- coding: utf-8 -*-

from nile.api.v1 import (
    filters as nf,
    aggregators as na,
    extractors as ne,
    statface as ns,
    clusters,
    Record
)

from qb2.api.v1 import extractors as se, filters as sf
import argparse
import nile
import datetime
import uatraits, json, re
import urllib, urlparse
import math,cgi
import pandas as pd
from itertools import product
import sys
import os


cluster = clusters.yt.Arnold(pool='vika-pavlova'
      ).env(templates=dict(job_root='//home/videolog/vika-pavlova/4893-preview_metrics'
                          ),
            yt_spec_defaults=dict(pool_trees=["physical"],
                                  tentative_pool_trees=["cloud"]),
            parallel_operations_limit=10
           )

def find_data(recs):
    for rec in recs:
        cratio = ''
        ishighres = False
        cwidth = ''
        if "attrs" in rec and rec["attrs"]:
            for item in rec["attrs"]:
                if item[0][0] == 'cratio':
                    cratio = item[2][0]
                elif item[0][0] == 'previews':
                    previews = json.loads(item[2][0])
                    for preview in previews["previews"]:
                        if preview.get('ishighres'):
                            ishighres = True
                            break
                    ishighres = previews["previews"][0].get('ishighres')
                elif item[0][0] == 'cwidth':
                    cwidth = item[2][0]
        yield Record(rec, cratio = cratio, ishighres = ishighres, cwidth = cwidth)

def prepare_for_stat(date):
    job = cluster.job()

    raw = job.table('//home/videolog/vika-pavlova/4893-preview_metrics/index_' + date
                   ).map(find_data)

    all_reqs = raw.project(ne.all(exclude='player'),
                      player = ne.custom(lambda x: x if x in ["youtube", "vk", "vh", "mailru_new"] else 'other', 'player'),
                      fielddate = ne.const(date)
                     ).groupby('player', 'fielddate'
                              ).aggregate(tvt = na.sum('tvt'),
                                          shows = na.sum('count'),
                                          urls_count = na.count(),
                                          horizontal_cratio = na.count_distinct("GroupingUrl",
                                                                                predicate=nf.custom(lambda x: x and float(x) > 1, 'cratio')),
                                          vertical_cratio = na.count_distinct("GroupingUrl",
                                                                              predicate=nf.custom(lambda x: x and float(x) < 1 and float(x) != 0, 'cratio')),
                                          preview_ishighres = na.count_distinct("GroupingUrl",
                                                                                predicate=nf.custom(lambda x: x, 'ishighres')),
                                          preview_cwidth = na.count_distinct("GroupingUrl",
                                                                             predicate=nf.custom(lambda x: x and int(x) > 1, 'cwidth'))
                                         )

    total = raw.project(ne.all(exclude='player'),
                        player = ne.custom(lambda x: x if x in ["youtube", "vk", "vh", "mailru_new"] else 'other', 'player'),
                        fielddate = ne.const(date)
                       ).groupby('fielddate'
                                ).aggregate(tvt = na.sum('tvt'),
                                           shows = na.sum('count'),
                                           urls_count = na.count(),
                                           horizontal_cratio = na.count_distinct("GroupingUrl",
                                                                                 predicate=nf.custom(lambda x: x and float(x) > 1, 'cratio')),
                                           vertical_cratio = na.count_distinct("GroupingUrl",
                                                                                predicate=nf.custom(lambda x: x and float(x) < 1 and float(x) != 0, 'cratio')),
                                           preview_ishighres = na.count_distinct("GroupingUrl",
                                                                                 predicate=nf.custom(lambda x: x and int(x) == 1, 'ishighres')),
                                           preview_cwidth = na.count_distinct("GroupingUrl",
                                                                              predicate=nf.custom(lambda x: x and int(x) > 1, 'cwidth'))
                                          ).project(ne.all(),
                                                   player = ne.const('_total_')
                                                  )

    job.concat(all_reqs, total).put('//home/videolog/vika-pavlova/4893-preview_metrics/final_' + date)

    job.run()

def put_to_stat(date):
    client = ns.StatfaceClient(
        proxy = 'upload.stat.yandex-team.ru',
        token = os.environ['STAT_TOKEN']
    )
    ns.StatfaceReport().path('Video.All/preview_metrics') \
                       .scale('daily') \
                       .client(client) \
                       .remote_publish(proxy='arnold',
                                       table_path='//home/videolog/vika-pavlova/4893-preview_metrics/final_' + date,
                                       async_mode=False,
                                       upload_config=False)

def main():

    parser = argparse.ArgumentParser()
    parser.add_argument('--start_date', type=str, required=True)
    parser.add_argument('--end_date', type=str, required=True)
    args = parser.parse_args()

    for date in pd.date_range(start=args.start_date, end=args.end_date):
        date_str = str(date)[:10]
        prepare_for_stat(date_str)
        put_to_stat(date_str)

        cluster.driver.remove('//home/videolog/vika-pavlova/4893-preview_metrics/parsed_' + date_str)
        cluster.driver.remove('//home/videolog/vika-pavlova/4893-preview_metrics/index_' + date_str)

if __name__ == '__main__':
    main()
