#-*- coding: UTF-8 -*-

from nile.api.v1 import (
    filters as nf,
    aggregators as na,
    extractors as ne,
    statface as ns,
    cli,
    with_hints,
    clusters,
    Record
)
from qb2.api.v1 import (
    extractors as se,
    filters as sf
)
from qb2.api.v1 import typing as qt

import nile
import json
import sys
import urlparse
import datetime


@with_hints(output_schema=dict(yandexuid=str, url=str, viewtime=float, duration=float, src_url=str))

def parse_bs(recs):
    for rec in recs:
        try:
            parsed_vc = json.loads(rec.vc)
        except:
            continue
        for elem in parsed_vc:
            if 'data' in elem.keys() and 'url' in elem.keys():
                url = elem['url']
                for video_info in elem['data']:
                    guid = video_info['uid']
                    viewtime = video_info.get('total_played_duration', 0)
                    duration = video_info.get('duration', 0)
                    if duration == 'live':
                        duration = -1
                    src_url = video_info['media_url']
            else:
                url = elem.keys()[0]
                try:
                    p = elem[url].get('p')
                except:
                    continue
                for video_info in p:
                    guid = video_info[0]
                    cor = 0
                    if len(str(guid)) <= 1:
                        guid = None
                        cor = -1
                    viewtime = video_info[5 + cor]
                    duration = video_info[4 + cor]
                    if duration == 'live':
                        duration = -1
                    src_url = video_info[9 + cor]

            if (url.startswith("http://") or url.startswith("https://")) and viewtime > 60*5:
                yield Record(yandexuid=rec.yandexuid, url=url, viewtime=viewtime, duration=duration, src_url=src_url)


@cli.statinfra_job
def make_job(job, options, statface_client):

    job = job.env(
        yt_spec_defaults=dict(pool_trees=["physical"], tentative_pool_trees=["cloud"]),
        templates=dict(job_root='//home/videolog/itajn/pirate_watches'))

    scale = "${global.scale}"
    date = options.dates[0]
    enddate = datetime.datetime.strptime(date, "%Y-%m-%d")
    if scale == "weekly":
        startdate = enddate - datetime.timedelta(days = 6)
        date = startdate.strftime("%Y-%m-%d")
        table_path = 'statbox/bar-navig-log/{%s..%s}' %(startdate.strftime("%Y-%m-%d"), enddate.strftime("%Y-%m-%d"))
    elif scale == "monthly":
        startdate = enddate.replace(day = 1)
        date = startdate.strftime("%Y-%m-%d")
        table_path = 'statbox/bar-navig-log/{%s..%s}' %(startdate.strftime("%Y-%m-%d"), enddate.strftime("%Y-%m-%d"))
    else:
        table_path = 'statbox/bar-navig-log/%s' % date

    log = "${global.logname}"

    prepared_recs = job.table(table_path,  weak_schema=dict(ip_numeric=qt.Optional[qt.String])
                             ).qb2(log='bar-navig-log',
                                   fields=['yandexuid',
                                           se.dictitem('decoded_vc', from_='parsed_http_params'),
                                           se.custom('vc', lambda x: x[0] if x else None, 'decoded_vc').with_type(qt.Json)
                                          ],
                                   filters=[sf.defined('vc', 'yandexuid'),
                                            sf.equals('yasoft', 'yabrowser'),
                                            sf.region_belongs([225], field='geo_id')]
                                  ).map(parse_bs, memory_limit=4000
                                       ) .project(ne.all(), fielddate=ne.const(date)).put('$job_root/precalculated_%s_%s_%s' % (log, scale, options.dates[0]))

    return job

if __name__ == '__main__':
    cli.run()
