from nile.api.v1 import (
    filters as nf,
    aggregators as na,
    extractors as ne,
    statface as ns,
    clusters,
    Record
)
from qb2.api.v1 import (
    extractors as se,
    filters as sf
)

import nile
import json
import math
import argparse
import os

def parse_bs(recs):
    for rec in recs:
        parsed_vc = json.loads(rec.vc)

        for elem in parsed_vc:
            if 'data' in elem.keys() and 'url' in elem.keys():
                url = elem['url']
                for video_info in elem['data']:
                    guid = video_info['uid']
                    viewtime = video_info['total_played_duration']
                    duration = video_info['duration']
                    src_url = video_info['media_url']
                    yield Record(yandexuid=rec.yandexuid, guid=guid, url=url, viewtime=viewtime, duration=duration, src_url=src_url, format='new')

            else:
                url = elem.keys()[0]
                try:
                    p = elem[url].get('p')
                except:
                    continue
                for video_info in p:
                    guid = video_info[0]
                    cor = 0
                    if len(str(guid)) <= 1:
                        guid = None
                        cor = -1
                    viewtime = video_info[5 + cor]
                    duration = video_info[4 + cor]
                    src_url = video_info[9 + cor]
                    yield Record(yandexuid=rec.yandexuid, guid=guid, url=url, viewtime=viewtime, duration=duration, src_url=src_url, format='old')

parser = argparse.ArgumentParser()
parser.add_argument('--date')
args = parser.parse_args()
date = args.date

cluster = clusters.yt.Hahn()
job = cluster.job()

prepared_recs = job.table('statbox/bar-navig-log/%s' %date
                         ).qb2(log='bar-navig-log',
                               fields=['yandexuid',
                                       se.dictitem('decoded_vc', from_='parsed_http_params'),
                                       se.custom('vc', lambda x: x[0] if x else None, 'decoded_vc')
                                      ],
                               filters=[sf.defined('vc', 'yandexuid'),
                                        sf.equals('yasoft', 'yabrowser'),
                                        sf.region_belongs([225], field='geo_id')]
                              ).map(parse_bs, memory_limit=4000
                                   ).put('//home/videolog/liza-p/mma-2667/regular_calc/%s spy-log (prepared records)' %date)

has_guid = prepared_recs.filter(sf.defined('guid')
                               ).groupby('yandexuid', 'guid'
                                        ).aggregate(url=na.any('url'),
                                                    src_url=na.any('src_url'),
                                                    viewtime=na.max('viewtime'),
                                                    duration=na.any('duration'))

no_guid = prepared_recs.filter(nf.not_(sf.defined('guid')))

prepared_recs_2 = job.concat(has_guid, no_guid).put('//home/videolog/liza-p/mma-2667/regular_calc/%s spy-log (prepared records 2)' %date)

prepared_recs_2.filter(sf.startswith('url', 'https://yandex.ru/video')
                      ).project(yandexuid='yandexuid',
                                tvt=ne.custom(lambda viewtime, duration: min(viewtime, duration) if duration != 'live' else 0)
                               ).project(ne.all(),
                                         lvt=ne.custom(lambda tvt: math.log(tvt-25.0) if tvt >= 30 else 0)
                                        ).groupby().aggregate(tvt=na.sum('tvt'),
                                                              lvt=na.sum('lvt'),
                                                              uids=na.count_distinct('yandexuid')
                                                             ).project(ne.all(), fielddate=ne.const(date), host=ne.const('yandex.ru/video')
                                                                      ).put('//home/videolog/liza-p/mma-2667/regular_calc/%s spy-log (stats)' %date)

job.run()

client = ns.StatfaceProductionClient(token = os.environ['STAT_TOKEN'])

ns.StatfaceReport().path('Video.All/Others/compareTVT2') \
                   .scale('daily') \
                   .client(client) \
                   .remote_publish(proxy='hahn',
                                   table_path='//home/videolog/liza-p/mma-2667/regular_calc/%s spy-log (stats)' %date)
