#!/usr/bin/env python
# -*- coding: utf-8 -*-

from __future__ import division
import yt.wrapper as yt
import sys
import libra
import argparse

fage = 259200
wizards=['news','video','images','maps','adresa']

def HandleOption():
    parser = argparse.ArgumentParser()
    parser.add_argument("--server", dest = "server", help = "yt server",default = 'hahn.yt.yandex.net', required = False)
    parser.add_argument("--bs", dest = "blockstat", help = "path to blockstat.dict",default = '/home/itajn/serploader/blockstat.dict', required = False)
    return parser

def countsurplus(key, recs):
    uid = key
    try:
        session = libra.ParseSession(recs, './blockstat.dict')
    except:
        return

    for request in session:
        win = {}
        loss = {}
        hasit = {}
        was = {}
        for w in wizards + ['fresh', 'web']:
            win[w] = 0
            loss[w] = 0
            hasit[w] = False
            was[w] = False

        freshmax = 241
        freshcount = 0

        if not (request.IsA("TYandexWebRequest") or request.IsA("TMobileYandexWebRequest") or request.IsA("TTouchYandexWebRequest")):
            continue

        region = request.ServiceDomRegion
        if (region ==  'ru') or (region ==  None):
            region = 'ru'
        else:
            continue

        if request.IsA('TMiscRequestProperties'):
            sprops = request.SearchPropsValues
            if 'UPPER.ApplyBlender.RandomizedIntents' in sprops or 'ApplyBlender.RandomizedIntents' in sprops:
                print >> sys.stderr, 'Locrandom caught!', sprops
                continue
            if 'UPPER.Fresh.IntentProbability' in sprops :
                ip = float(sprops['UPPER.Fresh.IntentProbability'])
            else:
                ip = 0.0

        query = request.Query
        query = query.lower()
        if len(query) > 4095:
            query = query[:4095]
        for block in request.GetMainBlocks():
            c = 0
            for click in block.GetClicks():
                if int(click.DwellTime) >= 15:
                    c += 1
            res = block.GetMainResult()
            this = 'none'
            if res.IsA("TBlenderWizardResult")or res.IsA("TWizardResult"):
                for w in wizards:
                    if res.Name == w:
                        win[w] += c
                        hasit[w] = True
                        was[w] = True
                        this = w
                        break
            elif res.IsA("TWebResult"):
                m = res.Markers
                if ("FreshAge" in m) and (int(m['FreshAge']) <= fage):
                    win['fresh'] += c
                    hasit['fresh'] = True
                    was['fresh'] = True
                    this = 'fresh'
                    if block.Position < freshmax:
                        freshmax = block.Position
                    freshcount += 1
                else:
                    win['web'] += c
                    hasit['web'] = True
                    was['web'] = True
                    this = 'web'
            for w in wizards + ['fresh','web']:
                if hasit[w] and this <> w:
                    loss[w] += c
                    hasit[w] = False
                    break

        result = {'query':query, 'ip':ip, 'freshmax':freshmax, 'freshcount':freshcount}
        something = 0
        for tp in wizards + ['fresh', 'web']:
            #taken from GetSurplusBlockHeightModifier
            if tp == 'fresh' or tp == 'web':
                k = 1.0
            elif tp == 'news':
                k = 0.93
            elif tp == 'maps' or tp == 'adresa':
                k = 1.2
            elif tp == 'images':
                k = 2.0
            elif tp == 'video':
                k = 1.5
            if was[tp]:
                result[tp] = win[tp] - k * loss [tp]
                if tp <> 'web':
                    something = 1
        if something:
            yield result

def glue(key,recs):
    count = 0
    ip = 0.0
    avgfreshcount = 0
    avgfreshmax = -1
    other = {}
    shows = {}
    for w in wizards + ['fresh', 'web']:
        other[w] = 0
        shows[w+'_shows'] = 0
    for r in recs:
        count += 1
        ip += r['ip']
        avgfreshcount += r['freshcount']
        if r['freshmax'] < 241:
            if avgfreshmax == -1:
                avgfreshmax = r['freshmax']
            else:
                avgfreshmax += r['freshmax']
        for w in wizards + ['fresh', 'web']:
            if w in r:
                other[w] += r[w]
                shows[+'_shows'] += 1
    result = {'query': key['query'],
              'count': count,
              'ip_avg': ip / count,
              'avg_freshmax' : avgfreshmax / count,
              'avg_freshcount' : avgfreshcount / count
              }
    result.update(other)
    result.update(shows)
    yield result

def main():
    args = HandleOption().parse_args()
    yt.update_config({'proxy': {'url': args.server}})

    dates=['2016-07-11', '2016-07-12', '2016-07-13', '2016-07-14', '2016-07-15', '2016-07-16', '2016-07-17', '2016-07-18', '2016-07-19', '2016-07-20', '2016-07-21', '2016-07-22', '2016-07-23', '2016-07-24']

    for day in dates:
        usersessions= '//user_sessions/pub/search/daily/' + day + '/clean'
        output = '//home/freshness/staff/itajn/FR-2363/' + day
        output_a = '//home/freshness/staff/itajn/FR-2363/' + day + '_agg'

        if not yt.exists(output):
            yt.create_table(path = output, recursive=True)
        yt.run_reduce(countsurplus,
                      source_table = usersessions,
                      destination_table = output,
                      local_files = [args.blockstat],
                      reduce_by = 'key',
                      spec={'data_size_per_job': 16000000000}#~16GB
                      )
        yt.run_sort(source_table = output,
                    destination_table = output,
                    sort_by='query'
                    )
        yt.run_reduce(glue,
                    source_table = output,
                    destination_table = output_a,
                    reduce_by = 'query',
                    )

if __name__ ==  '__main__':
    main()
