# -*- coding: utf-8 -*-

from nile.api.v1 import (
    filters as nf,
    aggregators as na,
    extractors as ne,
    statface as ns,
    clusters,
    Record
)

from qb2.api.v1 import extractors as se, filters as sf
import argparse
import nile
import datetime
import uatraits, json, re
import urllib, urlparse
import math,cgi
import pandas as pd
from itertools import product
import sys
import os


def process_data(date):

    cluster = clusters.yt.Hahn(pool='vika-pavlova'
      ).env(templates=dict(job_root='home/videolog/vika-pavlova/3743-vh_factors'
                          ),
            yt_spec_defaults=dict(pool_trees=["physical"],
                                  tentative_pool_trees=["cloud"]),
            parallel_operations_limit=10
           )


    job = cluster.job()

    raw = job.table('//cubes/video-strm/'+ date + '/sessions'
                   ).project('fielddate', 'yu_hash', 'view_time', 'UUID', "vsid", 'ref_from',
                             is_start = ne.custom(lambda x: 1 if 'start' in x["sources_aggr"] else 0,
                                                     "add_info"),
                             is_hb = ne.custom(lambda x: 1 if x >= 30 else 0, "view_time")
                            ).filter(sf.defined('UUID'))

    t1 = raw.groupby('fielddate', 'UUID', "vsid"
               ).aggregate(tvt = na.sum('view_time'),
                           yu_hash = na.max('yu_hash'),
                           has_start = na.max('is_start'),
                           has_hb = na.max('is_hb')
                          ).project(ne.all(),
                                    lvt = ne.custom(lambda x: math.log(x, math.e) if x else 0, 'tvt')
                                   ).groupby('fielddate', 'UUID'
                                            ).aggregate(total_tvt = na.sum('tvt'),
                                                        total_lvt = na.sum('lvt'),
                                                        total_uids = na.count_distinct('yu_hash'),
                                                        total_uids_with_hb = na.count_distinct('yu_hash',
                                                                                                predicate=nf.custom(lambda x: x == 1, 'has_hb')),
                                                        total_uids_with_start = na.count_distinct('yu_hash',
                                                                                                predicate=nf.custom(lambda x: x == 1, 'has_start')
                                                                                                )
                                                        ).project(ne.all(),
                                                                  url = ne.custom(lambda x: 'frontend.vh.yandex.ru/player/' + x, 'UUID')
                                                                  )
    t2 = raw.filter(sf.custom(lambda x: x not in ["yavideo", "ottwidget_ya-video", "ottwidget_yavideo"],
                                        'ref_from'
                             )
                    ).groupby('fielddate', 'UUID', "vsid"
                            ).aggregate(tvt = na.sum('view_time'),
                                        yu_hash = na.max('yu_hash'),
                                        has_start = na.max('is_start'),
                                        has_hb = na.max('is_hb')
                                        ).project(ne.all(),
                                                    lvt = ne.custom(lambda x: math.log(x, math.e) if x else 0, 'tvt')
                                                ).groupby('fielddate', 'UUID'
                                                            ).aggregate(tvt_wo_video = na.sum('tvt'),
                                                                        lvt_wo_video = na.sum('lvt'),
                                                                        uids_wo_video = na.count_distinct('yu_hash'),
                                                                        uids_wo_video_with_hb = na.count_distinct('yu_hash',
                                                                                                                predicate=nf.custom(lambda x: x == 1, 'has_hb')),
                                                                        uids_wo_video_with_start = na.count_distinct('yu_hash',
                                                                                                                predicate=nf.custom(lambda x: x == 1, 'has_start')
                                                                                                                )
                                                                        ).project(ne.all(),
                                                                                url = ne.custom(lambda x: 'frontend.vh.yandex.ru/player/' + x, 'UUID')
                                                                                )
    t1.join(t2, by=('UUID', 'url', 'fielddate'), type = 'left'
           ).put('home/videolog/VH_FACTORS/vh_factors_table', append = True)


    job.run()


def main():

    parser = argparse.ArgumentParser()
    parser.add_argument('--start_date', type=str, required=True)
    parser.add_argument('--end_date', type=str, required=True)
    args = parser.parse_args()

    for date in pd.date_range(start=args.start_date, end=args.end_date):
        date_str = str(date)[:10]
        process_data(date_str)


if __name__ == '__main__':
    main()
