#-*-coding: utf8 -*-

from nile.api.v1 import (
    filters as nf,
    aggregators as na,
    extractors as ne,
    statface as ns,
    cli,
    with_hints,
    clusters,
    Record
)

import nile
import math

@with_hints(output_schema=dict(
    uid=str, tvt=float, lvt=float, category=str)
           )

def parse_us(groups):
    for key, recs in groups:
        import libra
        try:
            session = libra.ParseSession(recs, './blockstat.dict')
        except Exception as e:
            continue

        serpids_film = []
        tvt, lvt = 0, 0
        for req in session:
            if (not req.IsA('TYandexVideoRequest')) and (not req.IsA('TYandexRelatedVideoRequest')):
                continue
            if req.ServiceDomRegion != 'ru':
                continue
            if req.GetBrowser()[0] != 'YandexBrowser':
                continue

            category = None
            if req.RelevValues.get('vserial', 0) > 0:
                category = 'film/series'
            elif req.SerpID in serpids_film:
                category = 'film/series'
            elif 'Film' in req.SearchPropsValues.get('REPORT.entity', ''):
                category = 'film/series'
                serpids_film.append(req.SerpID)
            elif req.SearchPropsValues.get('VIDEO.VideoPorno.vidprn', '') == 'ipq1':
                category = 'porn'
            else:
                category = 'other'

            tvt, lvt = 0, 0
            for block in req.GetMainBlocks():
                result = block.GetMainResult()
                if not result.IsA("TVideoResult"):
                    continue
                view_time = 0
                dur = req.FindVideoDurationInfo(result)
                if dur:
                    view_time = min(dur.PlayingDuration, dur.Duration)
                hb = req.FindVideoHeartbeat(result, "ANY")
                if hb:
                    view_time = max(view_time, hb.Ticks)
                tvt += view_time
                if view_time > 30:
                    lvt += math.log(view_time - 25.)

            yield Record(uid=key.key, tvt=tvt, lvt=lvt, category=category)
            yield Record(uid=key.key, tvt=tvt, lvt=lvt, category='_total_')

@cli.statinfra_job
def make_job(job, options, statface_client):

    job = job.env(
        yt_spec_defaults=dict(pool_trees=["physical"], tentative_pool_trees=["cloud"]),
        templates=dict(job_root='//home/videolog/liza-p/mma-2667/regular_calc2'))

    report = ns.StatfaceReport() \
        .path('Video/Others/compareTVT/TVTbyHosts')\
        .scale('daily')\
        .client(statface_client)

    for date in options.dates:
        job.table('//user_sessions/pub/search/daily/%s/clean' %date).groupby('key').sort('subkey') \
          .reduce(parse_us,
                  files=[nile.files.RemoteFile('statbox/statbox-dict-last/blockstat.dict'),
                         nile.files.RemoteFile('statbox/resources/libra.so')],
                  memory_limit=4000
                 #).put('$job_root/%s user_sessions(prepared records)' %date
                      ).groupby('category'
                               ).aggregate(tvt=na.sum('tvt'), lvt=na.sum('lvt'), uids=na.count_distinct('uid')
                                           ).project(ne.all(exclude='tvt'),
                                                     tvt=ne.custom(lambda tvt: tvt/3600.0).with_type(float),
                                                     fielddate=ne.const(date).with_type(str),
                                                     host=ne.const('yandex.ru/video').with_type(str),
                                                     platform=ne.const('desktop').with_type(str)
                                                    #).put('$job_root/%s user_sessions (stats)' %date
                                                         ).publish(report)

    return job

if __name__ == '__main__':
    cli.run()

