#-*- coding: UTF-8 -*-
import nile
import argparse
import time
from nile.api.v1 import (
    filters as nf,
    aggregators as na,
    extractors as ne,
    statface as ns,
    clusters,
    Record,
    cli,
)
from qb2.api.v1 import (
    extractors as se,
    filters as sf
)
from copy import deepcopy
import urllib
from datetime import datetime, timedelta
import os

def calc_video_stats(groups):
    import urlparse
    import libra
    from math import log
    import uatraits

    REFUSE_THRESHOLD = 30
    detector = uatraits.detector('/usr/share/uatraits/browser.xml')

    for key,recs in groups:
        uid = key.key

        try:
            s = libra.ParseSession(recs, './blockstat.dict')
        except Exception as e:
            continue

        for r in s:
            if not r.IsA("TVideoRequestProperties"):
                continue
            ### Platform calc

            if r.IsA('TDesktopUIProperties'):
                ui = 'desktop'
            elif r.IsA('TTouchUIProperties'):
                ui = 'touch'
            elif r.IsA('TPadUIProperties'):
                ui = 'pad'
            elif r.IsA('TMobileAppUIProperties'):
                ui = 'app'

            ua = str(r.UserAgent)
            d = detector.detect(ua)

            isTV = d.get('isTV')
            if isTV:
                ui = 'tv'
                url = urllib.unquote(r.FullRequest)
                if 'video/tvapp' in url:
                    ui = 'video/tvapp'
                elif 'video/tv' in url:
                    ui = 'video/tv'

            ### Source and path params calc
            path = "no_path"
            source = "no_source"
            utm_source = "no_utm_source"
            kids = 0
            parsed_url = urlparse.urlparse(r.FullRequest)
            cgi_params = urlparse.parse_qs(parsed_url.query)
            if cgi_params.get("path"):
                path = cgi_params.get("path")[0]
            if cgi_params.get("source"):
                source = cgi_params.get("source")[0]
            if cgi_params.get("utm_source"):
                utm_source = cgi_params.get("utm_source")[0]
            if cgi_params.get("kids"):
                kids = 1

            ### TVT stats calc
            is_first_page = r.SerpID == r.ReqID
            is_search_request = not r.IsA("TRelatedVideoRequestProperties") and not r.IsA("TVideoPortalRequestProperties")

            tvt = 0
            lvt = 0
            deep_watches = 0

            for block in r.GetMainBlocks():
                result = block.GetMainResult()
                view_time = 0
                dur = r.FindVideoDurationInfo(result)
                if dur:
                    view_time = min(dur.PlayingDuration, dur.Duration)
                if r.FindVideoHeartbeat(result, "SINGLE"):
                    deep_watches += 1
                hb = r.FindVideoHeartbeat(result, "ANY")
                if hb:
                    view_time = max(view_time, hb.Ticks)
                tvt += view_time
                if view_time > REFUSE_THRESHOLD:
                    lvt += log(view_time - 25.)

            ### Yielding stats
            fielddate = datetime.fromtimestamp(r.Timestamp).strftime('%Y-%m-%d')
            for path_variant in [path, "_total_"]:
                for source_variant in [source, "_total_"]:
                    for ui_variant in [ui, "_total_"]:
                        for utm_source_variant in [utm_source, "_total_"]:
                            for kids_variant in [kids, "_total_"]:
                                yield Record(uid=uid, ui=ui_variant,
                                             fielddate=fielddate,
                                             source=source_variant,
                                             path=path_variant,
                                             utm_source=utm_source_variant,
                                             kids=kids_variant,
                                             tvt=tvt,
                                             lvt=lvt,
                                             deep_watches=deep_watches,
                                             serpids=is_first_page and is_search_request)

def push_to_stat_new(report_table, scale, report, stat_username, stat_token):
    client = ns.StatfaceClient(proxy='upload.stat.yandex-team.ru',
                               username=stat_username,
                               token=stat_token)

    ns.StatfaceReport().path(report) \
                       .scale(scale) \
                       .client(client) \
                       .remote_publish(proxy='hahn',
                                       table_path=report_table,
                                       async_mode=False,
                                       upload_config=False)

@cli.statinfra_job
def make_job(job, options, statface_client):
    date = options.dates[0]

    report= ns.StatfaceReport() \
        .path('Video.All/Video source stat')\
        .scale('daily')\
        .client(statface_client)

    job = job.env(
        yt_spec_defaults=dict(
            pool_trees=["physical"],
            tentative_pool_trees=["cloud"]
            ),
        templates=dict(
            tmp_root='//tmp',
            title='VideoStatsByCgiParams'
            )
        )

    us = job.table('user_sessions/pub/video/daily/' + date + '/clean')

    reqs = us.groupby('key') \
             .sort('subkey') \
             .reduce(calc_video_stats,
                     files=[nile.files.StatboxDict('blockstat.dict', use_latest=True),
                           nile.files.RemoteFile('statbox/resources/libra.so'),
                           nile.files.StatboxWheel('yandex_baobab_api'),
                           ],
                     memory_limit=4000) \
             .groupby('uid', 'ui', 'source', 'path', 'utm_source', 'kids') \
             .aggregate(fielddate=na.any('fielddate'),
                        tvt=na.sum('tvt'),
                        lvt=na.sum('lvt'),
                        deep_watches=na.sum('deep_watches'),
                        serpids=na.sum('serpids')) \
             .groupby('ui', 'source', 'path', 'utm_source', 'kids') \
             .aggregate(uids=na.count(),
                        tvt=na.sum('tvt'),
                        lvt=na.sum('lvt'),
                        deep_watches=na.sum('deep_watches'),
                        serpids=na.sum('serpids'),
                        fielddate=na.any('fielddate')) \
             .filter(nf.custom(lambda x: x > 10, 'uids')) \
             .publish(report, remote_mode=True)

    return job

if __name__ == '__main__':
    cli.run()
