from nile.api.v1 import (
    filters as nf,
    aggregators as na,
    extractors as ne,
    statface as ns,
    clusters,
    Record
)

from qb2.api.v1 import extractors as se, filters as sf
import argparse
import nile
import datetime
import uatraits, json, re
import urllib, urlparse
import math,cgi
import pandas as pd
from itertools import product
import sys
import os


def recs_combination(recs):

    for rec in recs:

        vh_tvt = rec.vh_tvt
        other_tvt = rec.other_tvt
        vid_clicks = rec.vid_clicks
        other_clicks = rec.other_clicks
        reqs = rec.reqs
        date = rec.date

        recs_list = list(product(
            (rec.object_item, '_total_'),
            (rec.ui, '_total_'),
            (rec.vh_filter, '_total_')))

        for triplet in recs_list:
            yield Record(object_item=triplet[0], ui=triplet[1], vh_filter=triplet[2], vh_tvt=vh_tvt,
                        other_tvt=other_tvt, vid_clicks=vid_clicks, other_clicks=other_clicks, reqs=reqs,
                        date=date
                        )

def parse_oo_data(records):
    domain = 'ru'
    for r in records:
        value = json.loads(r.value)
        o_type = value.get('isa', {}).get('otype')
        o_type_value = [x['value'] for x in o_type if 'value' in x] if o_type else []
        o_type_subvalue = [x['subvalue'] for x in o_type if 'subvalue' in x] if o_type else []

        elems = {}
        k, v  = ('query', "SearchRequest")
        elems[k] = [x["value"] for x in value.get(v, []) if domain in x.get("RelevLocale", [])]
        elems[k] = elems[k][0] if elems[k] else None
        if not elems['query']:
            continue

        yield Record(onto_id=r.key, oo_query=elems['query'],ttype=o_type_value)

def process_data_for_stat(date):

    cluster = clusters.yt.Hahn(
    ).env(templates=dict(job_root='home/videolog/vika-pavlova/2082-vh_and_clicks_by_onto_id/all_dates'),
          yt_spec_defaults=dict(pool_trees=["physical"],
                                tentative_pool_trees=["cloud"]
                               ),
          parallel_operations_limit=10
         )


    job = cluster.job()

    t = job.table('home/videoindex/full/video_hosting/episodes'
                 ).filter(sf.defined('onto_id'))

    tt = t.filter(sf.or_(sf.equals('is_film', True),
                        sf.equals('is_series', True)
                        )
                 )

    ttt = tt.groupby('onto_id').aggregate(urls=na.count()).sort('urls')

    reqs = job.table('home/videolog/vika-pavlova/one_us_parse_for_all_reports/2082_2184_vh_' + date
                    ).project('ui', 'date', 'reqs', 'vh_tvt', 'other_tvt', 'vid_clicks', 'other_clicks', object_id = ne.custom(lambda x: x.split('-')[0], 'object_id'))

    aggr = reqs.groupby('ui','object_id', 'date').aggregate(reqs=na.count(),
                                                    vh_tvt=na.sum('vh_tvt'),
                                                    other_tvt=na.sum('other_tvt'),
                                                    vid_clicks=na.sum('vid_clicks'),
                                                    other_clicks=na.sum('other_clicks')
                                                   )

    join_onto_ids = ttt.join(aggr, by_left = 'onto_id', by_right = 'object_id', type ='right'
                            ).project('date', 'object_id', 'reqs', 'vh_tvt', 'vid_clicks', 'other_clicks', 'other_tvt', 'ui',
                                      vh_filter = ne.custom(lambda x: 'is_vh' if x else 'not_vh', 'onto_id')
                                     )

    t1 = job.table('home/dict/ontodb/ver/daily/production/all_cards_final'
                ).map(parse_oo_data,memory_limit=4000)

    final = t1.join(join_onto_ids, by_left = 'onto_id', by_right = 'object_id', type ='inner'
                   ).project('date', 'object_id', 'reqs', 'vh_tvt', 'vid_clicks', 'other_clicks', 'other_tvt', 'ui',
                             'vh_filter', 'oo_query', 'ttype')

    final_for_films = final.project(ne.all(),type_1 = ne.custom(lambda x: x[0] if x else '-','ttype')
                                   ).filter(sf.contains('type_1', 'Film')
                                           ).project('date', 'object_id', 'reqs', 'vh_tvt', 'vid_clicks',
                                                     'other_clicks', 'other_tvt', 'ui', 'vh_filter', 'oo_query',
                                                     'ttype'
                                                    )

    prepare_for_top = final_for_films.project('date', 'vid_clicks', 'other_clicks', 'vh_tvt', 'other_tvt', 'reqs', 'ui', 'vh_filter',
                                              object_item=ne.custom(lambda a, b: a + ' ' + b, 'object_id', 'oo_query'),
                                              total=ne.custom(lambda a, b: a + b, 'vh_tvt', 'other_tvt'),
                                              negative_total=ne.custom(lambda a, b: (-1)*(a + b),'vh_tvt', 'other_tvt')
                                             ).sort('negative_total')

    t1 = prepare_for_top.top(1000, by = 'total', mode=max).sort('negative_total')

    t2 = prepare_for_top.filter(sf.equals('vh_filter', 'is_vh')
                               ).join(t1, by=('object_item', 'ui'), type='left_only'
                                     ).sort('negative_total'
                                           )

    temp = job.concat(t1, t2)

    t3 = prepare_for_top.join(temp, by=('object_item', 'ui'), type='left_only'
                             ).groupby('date', 'ui').aggregate(other_clicks=na.sum('other_clicks'),
                                                         other_tvt=na.sum('other_tvt'),
                                                         reqs=na.sum('reqs'),
                                                         vh_tvt=na.sum('vh_tvt'),
                                                         vid_clicks=na.sum('vid_clicks'),
                                                         total=na.sum('total'),
                                                         negative_total=na.sum('negative_total')
                                                        ).project(ne.all(),
                                                                object_item=ne.const('other'),
                                                                vh_filter=ne.const('not_vh')
                                                                )

    top = job.concat(t1, t2, t3)

    tt = top.project('date', 'reqs', 'vh_tvt', 'vid_clicks', 'other_clicks', 'other_tvt', 'vh_filter',
                     'object_item', ui=ne.custom(lambda x: 'desktop' if 'desktop' in x else x, 'ui')
                    ).project('date', 'reqs', 'vh_tvt', 'vid_clicks', 'other_clicks', 'other_tvt', 'vh_filter',
                              'object_item', ui=ne.custom(lambda x: 'touch' if 'touch' in x else x, 'ui')
                             ).project('date', 'reqs', 'vh_tvt', 'vid_clicks', 'other_clicks', 'other_tvt',
                                       'vh_filter', 'object_item',
                                       ui=ne.custom(lambda x: 'app' if 'app' in x else x, 'ui')
                                      )

    for_stat = tt.map(recs_combination, memory_limit=4000).groupby('date', 'object_item', 'ui', 'vh_filter'
                                                                  ).aggregate(reqs=na.sum('reqs'),
                                                                              vh_tvt=na.sum('vh_tvt'),
                                                                              other_tvt=na.sum('other_tvt'),
                                                                              vid_clicks=na.sum('vid_clicks'),
                                                                              other_clicks=na.sum('other_clicks')
                                                                             )

    for_stat.project('reqs','vh_tvt', 'vid_clicks', 'other_tvt', 'other_clicks',
                           fielddate=ne.custom(lambda a: a, 'date'),
                           platform=ne.custom(lambda a: a, 'ui'),
                           ontoid=ne.custom(lambda a: a, 'object_item'),
                           is_on_vh=ne.custom(lambda a: a, 'vh_filter')
                          ).sort('ontoid').put('$job_root/final_for_stat_' + date)

    job.run()


def put_data_to_stat(date):

    client = ns.StatfaceClient(
        proxy = 'upload.stat.yandex-team.ru',
        token = os.environ['STAT_TOKEN']
    )
    ns.StatfaceReport().path('Video.All/tvt_vh_share') \
                       .scale('daily') \
                       .client(client) \
                       .remote_publish(proxy='hahn',
                                       table_path='//home/videolog/vika-pavlova/2082-vh_and_clicks_by_onto_id/all_dates/final_for_stat_' + date,
                                       async_mode=False,
                                       upload_config=False)



def main():

    parser = argparse.ArgumentParser()
    parser.add_argument('--start_date', type=str, required=True)
    parser.add_argument('--end_date', type=str, required=True)
    args = parser.parse_args()

    for date in pd.date_range(start=args.start_date, end=args.end_date):
        date_str = str(date)[:10]
        process_data_for_stat(date_str)
        put_data_to_stat(date_str)


if __name__ == '__main__':
    main()
