from nile.api.v1 import (
    filters as nf,
    aggregators as na,
    extractors as ne,
    statface as ns,
    clusters,
    Record
)

from qb2.api.v1 import extractors as se, filters as sf

import nile
from datetime import datetime

def parse_us(groups):
    import libra

    for key,recs in groups:
        uid = key.key

        try:
            s = libra.ParseSession(recs, './blockstat.dict')
        except Exception as e:
            continue

        for r in s:
            if r.IsA('TYandexVideoRequest') or r.IsA('TTouchYandexVideoRequest'):
                service = 'video'

                spv = r.SearchPropsValues

                if str(spv.get('UPPER.VideoExtraItems.object_id')) != 'ruw5721835' and not 'kin016227' in str(spv.get('REPORT.winner_ids')):
                    continue

                for bl in r.GetMainBlocks():
                    result = bl.GetMainResult()
                    if not result.IsA("TVideoResult"):
                        continue

                    url = str(result.Url)

                    duration = r.FindVideoDurationInfo(result)
                    if (duration):
                        dt = min(duration.PlayingDuration, duration.Duration)
                        dur = max(duration.PlayingDuration, duration.Duration)
                    else:
                        dt = 0
                        dur = 0

                    heartbeat = r.FindVideoHeartbeat(result, 'ANY')
                    if (heartbeat):
                        ht = heartbeat.Ticks
                    else:
                        ht = 0

                    dur = result.Duration

                    res_tvt = max(dt,ht)

                    if res_tvt > 120:
                        yield Record(service=service,url=url,tvt=res_tvt,clicks=0,q=r.Query)

            elif r.IsA('TYandexWebRequest') or r.IsA('TTouchYandexWebRequest'):
                service = 'web'

                spv = r.SearchPropsValues

                if str(spv.get('UPPER.EntitySearch.Ontoid')) != 'ruw5721835':
                    continue

                for cl in r.GetClicks():
                    url = str(cl.Url)
                    if 'yandex.ru' in url:
                        continue
                    dw = cl.DwellTimeOnService
                    if dw > 120:
                        yield Record(service=service,url=url,tvt=0,clicks=1,q=r.Query)

def main():
    CASTLE_ROCK_PREFIX = 'home/videolog/CASTLE_ROCK'
    USER_SESSIONS_PREFIX = 'user_sessions/pub/search/fast'
    cluster = clusters.yt.Hahn().env(templates=dict(job_root=CASTLE_ROCK_PREFIX),
                                  parallel_operations_limit=10,
                                  yt_spec_defaults=dict(
                                    pool_trees=["physical"],
                                    tentative_pool_trees=["cloud"]
                                 ))

    last_calculated_ts = sorted(cluster.driver.list(CASTLE_ROCK_PREFIX + '/AGGR'), reverse=True)[0][-10:]
    for ts in cluster.driver.list(USER_SESSIONS_PREFIX):
        if ts > last_calculated_ts and cluster.driver.exists(USER_SESSIONS_PREFIX + '/' + ts + '/clean'):

            job = cluster.job()
            us = job.table('user_sessions/pub/search/fast/' + ts + '/clean')
            reqs = us.groupby('key').sort('subkey').reduce(parse_us,
                                                files=[nile.files.RemoteFile('statbox/statbox-dict-last/blockstat.dict'),
                                                       nile.files.RemoteFile('statbox/resources/libra.so')
                                                      ],
                                                           memory_limit=4000
                                                          ).put('$job_root/ALL/all_reqs_' + ts)
            reqs.groupby('url').aggregate(lines=na.count()
                                         ).sort('lines').put('$job_root/AGGR/aggr_urls_' + ts)
            job.run()

if __name__ == '__main__':
    main()
