# -*- coding: utf-8 -*-
from nile.api.v1 import (
    filters as nf,
    aggregators as na,
    extractors as ne,
    statface as ns,
    clusters,
    Record
)

from qb2.api.v1 import extractors as se, filters as sf
import argparse
import nile
import datetime
import uatraits, json, re
import urllib, urlparse
import math, cgi
import pandas as pd
from itertools import product
import sys
import os


def recs_combination(recs):

    for rec in recs:

        vh_tvt = rec.vh_tvt
        other_tvt = rec.other_tvt
        reqs = rec.reqs
        date = rec.date

        recs_list = list(product(
            rec.genre_list + ['_total_'],
            [rec.vh_filter, '_total_'],
            [rec.object_item, '_total_']))

        for pair in recs_list:
            yield Record(object_item=pair[2], vh_filter=pair[1], vh_tvt=vh_tvt,
                         other_tvt=other_tvt, reqs=reqs, date=date, genre=pair[0]
                        )


def genre_value_change(record):

    genres_dict = {'#ruw1091129':'ток-шоу', '#ruw11267':'фэнтези', '#ruw123186':'короткометражка',
                   '#ruw13849':'мюзикл', '#ruw13990':'музыка', '#ruw267584':'биография', '#ruw3191169':'игра',
                   '#ruw33652':'мелодрама', '#ruw35153':'детектив', '#ruw3780488':'семейный',
                   '#ruw4296':'мультфильм', '#ruw46906':'ужасы', '#ruw53972':'комедия', '#ruw6156498':'детский',
                   '#ruw74051':'фантастика', '#ruw77954':'документальный', '#ruw807374':'драма',
                   '#ruw8153':'аниме', '#ruw83285':'фильм-нуар', '#ruw98599':'боевик', '#ruw98600':'вестерн',
                   '#ruw98603':'приключения', '#ruw98609':'история', '#ruw98610':'военный',
                   '#ruw98642':'для взрослых', '#ruw98644':'триллер', '#ruw2978265':'музыка'
                  }

    for rec in record:

        negative_total = rec.negative_total
        date = rec.date
        object_item = rec.object_item
        reqs = rec.reqs
        vh_tvt = rec.vh_tvt
        other_tvt = rec.other_tvt
        total =rec.total
        vh_filter = rec.vh_filter
        genre_new_set = set()

        if rec.genre_list:
            for item in rec.genre_list:
                if item in genres_dict:
                    genre_new_set.add(genres_dict[item])
                else:
                    genre_new_set.add('другое')
        else:
            genre_new_set = {'другое'}

        yield Record(negative_total=negative_total, date=date, object_item=object_item, reqs=reqs,
                     vh_tvt=vh_tvt, other_tvt=other_tvt, total=total, vh_filter=vh_filter,
                     genre_list=list(genre_new_set)
                    )


def process_data_for_stat(date):

    cluster = clusters.yt.Hahn(pool='vika-pavlova'
    ).env(templates=dict(job_root='home/videolog/vika-pavlova/2184-genres_tvt_by_ontoid/all_dates'
                        ),
          yt_spec_defaults=dict(pool_trees=["physical"],
                                tentative_pool_trees=["cloud"]),
          parallel_operations_limit=10
         )

    job = cluster.job()

    t = job.table('home/videoindex/full/video_hosting/episodes'
                 ).filter(sf.defined('onto_id')
                         ).filter(sf.or_(sf.equals('is_film', True),
                                         sf.equals('is_series', True),
                                         sf.equals('is_tv_show', True),
                                         sf.equals('is_fragment', True),
                                         sf.equals('is_music', True)
                                        )
                                 )

    vh_ontoids = t.groupby('onto_id'
                          ).aggregate(urls=na.count()
                                     ).sort('urls')

    reqs = job.table('home/videolog/vika-pavlova/one_us_parse_for_all_reports/2082_2184_vh_' + date
                    ).project('ui', 'q', 'object_id', 'vh_tvt', 'other_tvt', 'date')


    aggr = reqs.groupby('object_id', 'date').aggregate(reqs=na.count(),
                                                       vh_tvt=na.sum('vh_tvt'),
                                                       other_tvt=na.sum('other_tvt'),
                                                      )

    join_onto_ids = vh_ontoids.join(aggr, by_left = 'onto_id', by_right = 'object_id', type ='right'
                                   ).project('date', 'object_id', 'reqs', 'vh_tvt', 'other_tvt',
                                             vh_filter = ne.custom(lambda x: 'is_vh' if x else 'not_vh', 'onto_id')
                                            )

    t1 = job.table('home/videolog/vika-pavlova/2184-genres_tvt_by_ontoid/film_cards')

    final = t1.join(join_onto_ids, by_left = 'onto_id', by_right = 'object_id', type ='inner'
                   ).project('date', 'object_id', 'reqs', 'vh_tvt', 'other_tvt','vh_filter', 'oo_query',
                             'ttype', 'genre_list'
                            )

    prepare_for_top = final.project('date', 'vh_tvt', 'other_tvt', 'reqs', 'vh_filter', 'genre_list',
                                    object_item=ne.custom(lambda a, b: a + ' ' + b, 'object_id', 'oo_query'),
                                    total=ne.custom(lambda a, b: a + b, 'vh_tvt', 'other_tvt'),
                                    negative_total=ne.custom(lambda a, b: (-1)*(a + b),'vh_tvt', 'other_tvt')
                                   ).sort('negative_total')

    prepare_for_top_with_genre = prepare_for_top.map(genre_value_change, memory_limit=4000
                                                    )

    t1 = prepare_for_top_with_genre.sort('negative_total'
                                        ).top(1000, by = 'total', mode=max
                                             ).sort('negative_total'
                                                   )

    t2 = prepare_for_top_with_genre.join(t1, by=('object_item', 'date'), type='left_only'
                                        ).groupby('date').aggregate(other_tvt=na.sum('other_tvt'),
                                                                    reqs=na.sum('reqs'),
                                                                    vh_tvt=na.sum('vh_tvt'),
                                                                    total=na.sum('total'),
                                                                    negative_total=na.sum('negative_total')
                                                                   ).project(ne.all(),
                                                                             object_item=ne.const('other'),
                                                                             vh_filter=ne.const('other'),
                                                                             genre_list=ne.const(['другое'])
                                                                            )

    top = job.concat(t1, t2)

    top.map(recs_combination, memory_limit=4000
            ).groupby('date', 'object_item', 'vh_filter', 'genre'
                    ).aggregate(reqs=na.sum('reqs'),
                                vh_tvt=na.sum('vh_tvt'),
                                other_tvt=na.sum('other_tvt')
                                ).project('reqs','vh_tvt', 'other_tvt', 'genre',
                                          fielddate=ne.custom(lambda a: a, 'date'),
                                          ontoid=ne.custom(lambda a: a, 'object_item'),
                                          is_on_vh=ne.custom(lambda a: a, 'vh_filter')
                                        ).sort('object_item').put('$job_root/final_for_stat_' + date)

    job.run()


def put_data_to_stat(date):

    client = ns.StatfaceClient(
        proxy = 'upload.stat.yandex-team.ru',
        token = os.environ['STAT_TOKEN']
    )
    ns.StatfaceReport().path('Video.All/Genre share') \
                       .scale('daily') \
                       .client(client) \
                       .remote_publish(proxy='hahn',
                                       table_path='//home/videolog/vika-pavlova/2184-genres_tvt_by_ontoid/all_dates/final_for_stat_' + date,
                                       async_mode=False,
                                       upload_config=False)


def main():

    parser = argparse.ArgumentParser()
    parser.add_argument('--start_date', type=str, required=True)
    parser.add_argument('--end_date', type=str, required=True)
    args = parser.parse_args()

    for date in pd.date_range(start=args.start_date, end=args.end_date):
        date_str = str(date)[:10]
        process_data_for_stat(date_str)
        put_data_to_stat(date_str)


if __name__ == '__main__':
    main()
