#!/usr/bin/env python2.7
# coding=utf-8
'''
From redir_log counts shows.
https://wiki.yandex-team.ru/visualbookmarks/tz/usermetrics/dashboard/#pokazynapolzovatelja
'''
from __future__ import division
from datetime import datetime, timedelta
import sys
from helpers import update_stat, daterange

try:
    import yt.wrapper as yt
    yt.config['token'] = 'AVImKQMAAAO3VDAg54V4QjeI8eaNZJSQfA'
except:
    print 'yt.wrapper not imported (probably script started locally)'

yt.config.set_proxy('hahn.yt.yandex.net')
yt.config["pickling"]["enable_tmpfs_archive"] = False


STAT_HOST = 'https://stat.yandex-team.ru/'
STAT_PATH = 'Distribution/vbch/shows'
STAT_URL = ''.join([STAT_HOST, STAT_PATH])

CIDS = {
    '72480': 'fx',
    '72481': 'ie',
    '72482': 'ch'
}


def _filter(rec):
    splitstrs = (item.split('=', 1) for item in rec['value'].split('\t'))
    value = {name: value for name, value in splitstrs}
    try:
        assert value.get('dtype') == 'stred'
        assert value.get('pid') == '12'
        assert value.get('cid') in CIDS
        assert value.get('path').endswith('.system.show')
        assert value.get('yandexuid')
    except AssertionError:
        return
    yield {key: value[key] for key in ('yandexuid', 'cid')}


def map_cid_to_bro(rec):
    browser = CIDS[rec['cid']]
    yield {
        'yandexuid': rec['yandexuid'],
        'browser': browser
    }


def sum_shows_for_each_user(key, recs):
    shows = sum(1 for rec in recs)
    yield {
        'browser': key['browser'],
        'shows_per_user': shows
    }


def avg_for_browser_shows_per_user(key, recs):
    shows_per_user_list = [int(rec['shows_per_user']) for rec in recs]
    shows_sum = sum(shows_per_user_list)
    shows_len = len(shows_per_user_list)
    avg_shows = shows_sum / shows_len
    yield {
        'browser': key['browser'],
        'avg_shows': avg_shows,
    }


def main():
    source_table_prefix = '//statbox/redir-log/'
    today = datetime.today()
    yesterday = (today - timedelta(days=1)).strftime('%Y-%m-%d')
    try:
        datefrom = sys.argv[1]
    except:
        datefrom = yesterday
    try:
        dateto = sys.argv[2]
    except:
        dateto = yesterday

    for day in daterange(datefrom, dateto):
        redirlog_table = source_table_prefix + day

        filtered = '//home/atom/chikachoff/vb/shows_filtered'
        mapped = '//home/atom/chikachoff/vb/shows_mapped'
        shows_per_user = '//home/atom/chikachoff/vb/shows_shows_per_user'
        avg_shows_per_bro = '//home/atom/chikachoff/vb/shows_avg_shows_per_bro'

        # yt.run_erase(filtered)
        # yt.run_erase(mapped)
        # yt.run_erase(shows_per_user)
        # yt.run_erase(avg_shows_per_bro)

        yt.run_map(
            _filter,
            source_table=redirlog_table,
            destination_table=filtered,
            format=yt.DsvFormat()
        )
        yt.run_map(
            map_cid_to_bro,
            source_table=filtered,
            destination_table=mapped,
            format=yt.DsvFormat(),
        )

        yt.run_sort(mapped, mapped, sort_by=['yandexuid', 'browser'])

        yt.run_reduce(
            sum_shows_for_each_user,
            source_table=mapped,
            destination_table=shows_per_user,
            format=yt.DsvFormat(),
            reduce_by=['yandexuid', 'browser'],
        )

        yt.run_sort(
            shows_per_user,
            shows_per_user,
            sort_by=['browser']
        )

        yt.run_reduce(
            avg_for_browser_shows_per_user,
            source_table=shows_per_user,
            destination_table=avg_shows_per_bro,
            format=yt.DsvFormat(),
            reduce_by=['browser'],
        )

        data = [
            rec for rec in
            yt.read_table(avg_shows_per_bro, format='dsv', raw=False)
        ]

        for rec in data:
            rec['fielddate'] = day

        for rec in data:
            print rec
        update_stat(data, STAT_PATH)
        print 'Portal updated date: {}. Url: {}'.format(day, STAT_URL)

        yt.run_erase(filtered)
        yt.run_erase(mapped)
        yt.run_erase(shows_per_user)
        yt.run_erase(avg_shows_per_bro)


if __name__ == '__main__':
    main()
