#!/usr/bin/env python
# -*- coding: utf-8 -*-

from __future__ import division
import sys
import libra
import argparse
import yt.wrapper as yt
import json


def HandleOption():
    parser = argparse.ArgumentParser()
    parser.add_argument("--server", dest="server", help="mapreduce server",default='hahn.yt.yandex.net:80', required=False)
    parser.add_argument("--bs", dest="blockstat", help="path to blockstat.dict",default='/home/itajn/serploader/blockstat.dict', required=False)
    return parser

def extract(key,recs):
    uid = key
    try:
        session = libra.ParseSession(recs, './blockstat.dict')
    except:
        return
    for request in session:
        show = False
        sprops = False
        tv1 = False
        ntv = False
        allclicks = 0
        allclicks_long = 0
        all1tv = 0
        all1tv_long = 0
        wiztimes = ''
        url = ''

        if not request.IsA('TYandexWebRequest'):
            continue

        query = request.Query.lower()
        if len(query) > 1000:
            query = query[:1000]

        if 'UPPER.TVTranslations.tv_shows' in request.SearchPropsValues:
            sprops = True

        for click in request.GetClicks():
            if click.Path == '254.273.1273.1345.184':
                wiztimes = click.DwellTime

        for block in request.GetMainBlocks():
            url = ''
            this = False
            res = block.GetMainResult()
            if res.IsA("TBlenderWizardResult") or res.IsA("TWizardResult"):
                if "snippet/tv/online/channel" in res.Path:
                   show = True
                   this = True
            if res.IsA("TWebResult"):
                if '1tv.ru' in res.Url:
                    tv1 = True
                    url = res.Url
                if 'ntv.ru' in res.Url:
                    ntv = True
                    url = res.Url
            for click in block.GetClicks():
                allclicks += 1
                if int(click.DwellTime) >= 15:
                    allclicks_long += 1
                if '1tv.ru' in url or 'ntv.ru' in url:
                    all1tv += 1
                if ('1tv.ru' in url or 'ntv.ru' in url) and int(click.DwellTime) >= 15:
                    all1tv_long += 1

        if (show or sprops) and (tv1 or ntv):
            yield {'query' : query,
                    'clicks' : allclicks,
                    'click_long' : allclicks_long,
                    'all1tv' : all1tv,
                    'all1tv_long' : all1tv_long,
                    'wizard' : wiztimes,
                    '1tv' : tv1,
                    'ntv' : ntv}


def main():
    args = HandleOption().parse_args()
    yt.update_config({'proxy': {'url': args.server}})

    days = ['2017-05-13', '2017-05-14', '2017-05-15', '2017-05-16', '2017-05-17', '2017-05-18', '2017-05-19']
    clicks = {'1tv' : 0, 'ntv' : 0}
    clicks_long = {'1tv' : 0, 'ntv' : 0}
    all1tv = {'1tv' : 0, 'ntv' : 0}
    all1tv_long = {'1tv' : 0, 'ntv' : 0}
    wiztimes = {'1tv' : {'short':0, '15':0, '30':0, '60':0}, 'ntv' : {'short':0, '15':0, '30':0, '60':0}}

    for day in days:
        usersessions='//user_sessions/pub/search/daily/' + day + '/clean'
        output = '//home/freshness/staff/itajn/FU-3697/' + day
        if not yt.exists(output):
            yt.create_table(path = output, recursive = True)
        yt.run_reduce(extract,
                      source_table = usersessions,
                      destination_table = output,
                      local_files = [args.blockstat],
                      spec = {'data_size_per_job': 16000000000},
                      reduce_by = 'key')
        for row in yt.read_table(output):
            if row['1tv']:
                type = '1tv'
            elif row['ntv']:
                type = 'ntv'
            clicks[type] += row['clicks']
            clicks_long[type] += row['click_long']
            all1tv[type] += row['all1tv']
            all1tv_long[type] += row['all1tv_long']
            if row['wizard'] <> '':
                wiz = int(row['wizard'])
                if wiz < 15:
                    wiztimes[type]['short'] += 1
                elif wiz < 30:
                    wiztimes[type]['15'] += 1
                elif wiz < 60:
                    wiztimes[type]['30'] += 1
                else:
                    wiztimes[type]['60'] += 1

    print clicks
    print clicks_long
    print all1tv
    print all1tv_long
    print wiztimes


if __name__ == '__main__':
    main()
