from nile.api.v1 import (
    filters as nf,
    aggregators as na,
    extractors as ne,
    statface as ns,
    clusters,
    Record
)

from qb2.api.v1 import extractors as se, filters as sf
import argparse
import nile
import datetime
import uatraits, json, re
import urllib, urlparse
import math,cgi
import pandas as pd
from itertools import product
import sys
import os


def parse_us(groups, wiz_doubles, vh_clicks, open_init_delta, player_event):
    import libra

    for key,recs in groups:
        uid = key.key

        try:
            s = libra.ParseSession(recs, "blockstat.dict", None, ["vid", "web"])
        except Exception as e:
            continue

        for r in s:
            if r.IsA('TYandexVideoRequest'):
                ui = 'desktop video'
            elif r.IsA('TTouchYandexVideoRequest'):
                ui = 'touch video'
            elif r.IsA('TYandexWebRequest'):
                ui = 'desktop web'
            elif r.IsA('TTouchYandexWebRequest'):
                ui = 'touch web'
            elif r.IsA('TMobileAppYandexVideoRequest') or r.IsA('TMobileAppYandexVideoPortalRequest') or r.IsA('TMobileAppYandexRelatedVideoRequest'):
                ui = 'app video'
            elif r.IsA('TPadYandexWebRequest'):
                ui = 'pad web'
            elif r.IsA('TYandexRelatedVideoRequest'):
                ui = 'desktop related video'
            elif r.IsA('TTouchYandexRelatedVideoRequest'):
                ui = 'touch related video'
            elif r.IsA('TPadYandexVideoRequest'):
                ui = 'pad video'
            else:
                continue

            q = str(r.Query).lower()
            date = str(datetime.datetime.fromtimestamp(r.Timestamp).isoformat()).split('T')[0]
            spv = r.SearchPropsValues
            reqid = r.ReqID

            if r.ServiceDomRegion == 'ru' and ui in ['desktop web', 'touch web', 'pad web'] and r.PageNo == 0:
                for bl in r.GetBSBlocks():
                    p = bl.Path
                    vv = bl.GetVars()
                    if 'snippet/video/showcase/item/thumb' in p:
                        filmid = '-'
                        index = -1
                        for v in vv:
                            if '-item' in v:
                                index = int(v[1])
                            if '-filmId' in v:
                                filmid = v[1]

                        if filmid != '-':
                            total_ui = '_total_'
                            wiz_doubles(Record(uid=uid,ui=ui,filmid=filmid,thumb=index,
                                                     reqid=r.ReqID,date=date)
                                             )
                            wiz_doubles(Record(uid=uid,ui=total_ui,filmid=filmid,thumb=index,
                                                     reqid=r.ReqID,date=date)
                                             )
            if ui == 'pad web':
                continue

            if 'video' in ui:
                object_id = spv.get('UPPER.VideoExtraItems.object_id')
            else:
                object_id = spv.get('UPPER.EntitySearch.Ontoid')

            if ui in ['desktop video', 'desktop related video']:
                platform = 'desktop'
            elif ui in ['touch video', 'touch related video']:
                platform = 'touch'
            else:
                platform = 'other'

            vh_tvt = 0
            other_tvt = 0
            vid_clicks = 0
            other_clicks = 0
            vh_urls = set()
            other_urls = set()

            main_blocks = {}
            player_events = {}

            for bl in r.GetMainBlocks():
                result = bl.GetMainResult()
                if result.IsA("TVideoResult"):

                    url = str(result.Url)
                    main_blocks[url] = {}
                    player_events[url] = {}

                    player_id = result.PlayerId
                    main_blocks[url]['player_id'] = player_id
                    player_events[url]['player_id'] = player_id
                    player_events[url]['has_open'] = 0
                    player_events[url]['has_inited'] = 0
                    player_events[url]['has_started'] = 0
                    player_events[url]['has_adShown'] = 0
                    player_events[url]['has_adEnd'] = 0
                    player_events[url]['has_paused'] = 0
                    player_events[url]['has_resumed'] = 0
                    player_events[url]['has_rewound'] = 0
                    player_events[url]['has_ended'] = 0

                    main_blocks[url]['open_player_ts'] = []
                    main_blocks[url]['init_player_ts'] = []
                    main_blocks[url]['start_player_ts'] = []

                    for cl in bl.GetClicks():
                        p = cl.ConvertedPath
                        pp = cl.Path
                        p = p + pp

                        if 'open-player' in p:
                            player_events[url]['has_open'] = 1
                            main_blocks[url]['open_player_ts'].append(cl.Timestamp)

                    duration = r.FindVideoDurationInfo(result)
                    if (duration):
                        dt = min(duration.PlayingDuration, duration.Duration)
                        dur = max(duration.PlayingDuration, duration.Duration)
                    else:
                        dt = 0
                        dur = 0

                    heartbeat = r.FindVideoHeartbeat(result, 'ANY')
                    if (heartbeat):
                        ht = heartbeat.Ticks
                    else:
                        ht = 0

                    dur = result.Duration

                    res_tvt = max(dt,ht)
                    lvt = math.log(res_tvt,math.e) if res_tvt > 30 else 0

                    main_blocks[url]['tvt'] = res_tvt
                    player_events[url]['tvt'] = res_tvt
                    player_events[url]['lvt'] = lvt

                    if res_tvt > 30:
                        view = 1
                    else:
                        view = 0

                    main_blocks[url]['view'] = view
                    player_events[url]['view'] = view

                    if 'frontend.vh' in url or 'kinopoisk.ru' in url:
                        vh_tvt += res_tvt
                        vh_urls.add(url)
                    else:
                        other_tvt += res_tvt
                        other_urls.add(url)

                elif result.IsA('TWebResult'):
                    for cl in bl.GetClicks():
                        url = str(cl.Url)
                        if 'yandex.ru/video' in url or 'kinopoisk.ru' in url:
                            vid_clicks += 1
                            vh_urls.add(url)
                        elif not 'yandex.ru' in url:
                            other_clicks += 1
                            other_urls.add(url)

                elif result.IsA('TBlenderWizardResult') or result.IsA('TWizardResult'):
                    if result.Name != "tv":
                        for cl in bl.GetClicks():
                            url = str(cl.Url)
                            if 'yandex.ru/video' in url or 'kinopoisk.ru' in url:
                                vid_clicks += 1
                            elif not 'yandex.ru' in url:
                                other_clicks += 1

            for bl in r.GetParallelBlocks():
                result = bl.GetMainResult()
                if result.IsA("TVideoResult"):

                    url = str(result.Url)

                    duration = r.FindVideoDurationInfo(result)
                    if (duration):
                        dt = min(duration.PlayingDuration, duration.Duration)
                        dur = max(duration.PlayingDuration, duration.Duration)
                    else:
                        dt = 0
                        dur = 0

                    heartbeat = r.FindVideoHeartbeat(result, 'ANY')
                    if (heartbeat):
                        ht = heartbeat.Ticks
                    else:
                        ht = 0

                    dur = result.Duration

                    res_tvt = max(dt,ht)
                    lvt = math.log(res_tvt,math.e) if res_tvt > 30 else 0

                    if 'frontend.vh' in url or 'kinopoisk.ru' in url:
                        vh_tvt += res_tvt
                        vh_urls.add(url)
                    else:
                        other_tvt += res_tvt
                        other_urls.add(url)

                elif result.IsA('TWebResult'):
                    for cl in bl.GetClicks():
                        url = str(cl.Url)
                        if 'yandex.ru/video' in url or 'kinopoisk.ru' in url:
                            vid_clicks += 1
                        elif not 'yandex.ru' in url:
                            other_clicks += 1

                elif result.IsA('TBlenderWizardResult') or result.IsA('TWizardResult'):
                    if result.Name != "tv":
                        for cl in bl.GetClicks():
                            url = str(cl.Url)
                            if 'yandex.ru/video' in url or 'kinopoisk.ru' in url:
                                vid_clicks += 1
                            elif not 'yandex.ru' in url:
                                other_clicks += 1

            if r.IsA("TWebRequestProperties"):
                for tech_event in r.GetYandexTechEvents():
                    if tech_event.IsA("TTVOnlinePlayerEvent"):
                        if tech_event.Path == "player-events.heartbeat":
                            vid_clicks += 1
                            break

            if r.ServiceDomRegion == 'ru':
                for tech in r.GetYandexTechEvents():
                    if not tech.IsA('TVideoPlayerEvent'):
                        continue

                    p = tech.Path
                    url = str(tech.Url)

                    if url not in main_blocks:
                        continue

                    if 'init' in p:
                        player_events[url]['has_inited'] = 1
                        if platform != 'other':
                            main_blocks[url]['init_player_ts'].append(tech.Timestamp)

                    if 'start' in p:
                        player_events[url]['has_started'] = 1
                        if platform != 'other':
                            main_blocks[url]['start_player_ts'].append(tech.Timestamp)

                    if 'adShown' in p:
                        player_events[url]['has_adShown'] = 1
                    if 'adEnd' in p:
                        player_events[url]['has_adEnd'] = 1
                    if 'paused' in p:
                        player_events[url]['has_paused'] = 1
                    if 'resumed' in p:
                        player_events[url]['has_resumed'] = 1
                    if 'rewound' in p:
                        player_events[url]['has_rewound'] = 1
                    if 'ended' in p:
                        player_events[url]['has_ended'] = 1

            if object_id and ui in ['desktop video', 'touch video', 'desktop web', 'touch web', 'app video'] and r.PageNo == 0:

                vh_clicks(Record(ui=ui, q=q, object_id=object_id, vh_tvt=vh_tvt, other_tvt=other_tvt,
                                 vid_clicks=vid_clicks, other_clicks=other_clicks, vh_urls=list(vh_urls),
                                 other_urls=list(other_urls), date=date
                                )
                         )
            if platform != 'other' and r.ServiceDomRegion == 'ru':

                for url, url_dict in main_blocks.iteritems():
                    has_start = 'no'
                    autoplay = 0
                    start_delta = -1
                    delta = -1
                    if (len(url_dict['init_player_ts']) == 1) and (len(url_dict['open_player_ts']) == 1):
                        if url_dict['init_player_ts'][0] >= url_dict['open_player_ts'][0]:
                            delta = url_dict['init_player_ts'][0] - url_dict['open_player_ts'][0]
                        if url_dict['start_player_ts']:
                            has_start = 'yes'
                            if url_dict['start_player_ts'][0] - url_dict['init_player_ts'][0] <=2:
                                autoplay = 1
                            start_delta = url_dict['start_player_ts'][0] - url_dict['init_player_ts'][0]

                        open_init_delta(Record(uid = uid, reqid = reqid, platform = platform, delta = delta,
                                               tvt = url_dict['tvt'], view = url_dict['view'],
                                               player_id = url_dict['player_id'], url = url, date = date,
                                               autoplay = autoplay, has_start = has_start,
                                               start_delta = start_delta, main_blocks=main_blocks
                                              )
                                        )

            if r.ServiceDomRegion == 'ru':
                for url, url_dict in player_events.iteritems():
                    player_event(Record(uid = uid, reqid = reqid, platform = platform, tvt = url_dict['tvt'],
                                         lvt = url_dict['lvt'], view = url_dict['view'],
                                         player_id = url_dict['player_id'], url = url, date = date,
                                         has_open = url_dict['has_open'], has_inited = url_dict['has_inited'],
                                         has_started = url_dict['has_started'],
                                         has_adShown = url_dict['has_adShown'],
                                         has_adEnd = url_dict['has_adEnd'],
                                         has_paused = url_dict['has_paused'],
                                         has_resumed = url_dict['has_resumed'],
                                         has_rewound = url_dict['has_rewound'],
                                         has_ended = url_dict['has_ended']
                                        )
                                 )


def process_us(date):

    cluster = clusters.yt.Hahn(
    ).env(templates=dict(job_root='home/videolog/vika-pavlova/one_us_parse_for_all_reports'),
          yt_spec_defaults=dict(pool_trees=["physical"],
                                use_default_tentative_pool_trees = True
                                #tentative_pool_trees=["cloud"]
                               ),
          parallel_operations_limit=10
         )

    job = cluster.job()

    us_video = job.table('user_sessions/pub/search/daily/' + date + '/clean')
    us_web = job.table('user_sessions/pub/video/daily/' + date + '/clean')

    us = job.concat(us_video, us_web)

    #us = job.table('user_sessions/pub/search/daily/' + date + '/clean')

    wiz_doubles, vh_clicks, open_init_delta, player_event = us.groupby('key').sort('subkey'
                                                                                  ).reduce(parse_us,
                                                                                           files=[nile.files.RemoteFile('statbox/statbox-dict-last/blockstat.dict'),
                                                                                                  nile.files.RemoteFile('statbox/resources/libra.so'),
                                                                                                  nile.files.StatboxWheel('yandex_baobab_api')],
                                                                                           memory_limit=4000,
                                                                                           intensity='data'
                                                                                           )


    wiz_doubles.sort('uid').put('$job_root/2205_wiz_doubles_' + date)
    vh_clicks.sort('object_id').put('$job_root/2082_2184_vh_' + date)
    open_init_delta.sort('player_id').put('$job_root/2459_open_init_' + date)
    player_event.sort('player_id').put('$job_root/3996_player_events_' + date)

    job.run()



def main():

    parser = argparse.ArgumentParser()
    parser.add_argument('--start_date', type=str, required=True)
    parser.add_argument('--end_date', type=str, required=True)
    args = parser.parse_args()

    for date in pd.date_range(start=args.start_date, end=args.end_date):
        date_str = str(date)[:10]
        process_us(date_str)


if __name__ == '__main__':
    main()
