#!/usr/bin/env python2.7
# coding=utf-8
'''
From redir_log counts navigates.
https://wiki.yandex-team.ru/visualbookmarks/tz/usermetrics/dashboard/#navigejjtynapolzovatelja
'''
from __future__ import division
from datetime import datetime, timedelta
import sys
from helpers import update_stat, daterange
try:
    import yt.wrapper as yt
    yt.config['token'] = 'AVImKQMAAAO3VDAg54V4QjeI8eaNZJSQfA'
except:
    print 'yt.wrapper not imported (probably script started locally)'

yt.config.set_proxy('hahn.yt.yandex.net')
yt.config["pickling"]["enable_tmpfs_archive"] = False


STAT_HOST = 'https://stat.yandex-team.ru/'
STAT_PATH = 'Distribution/vbch/dau'
STAT_URL = ''.join([STAT_HOST, STAT_PATH])

CIDS = {
    '72480': 'fx',
    '72481': 'ie',
    '72482': 'ch'
}


def _filter(rec):
    splitstrs = set(
        tuple(item.split('=', 1))
        for item in rec['value'].split('\t')
    )
    value_dict = {
        name: value
        for name, value in splitstrs
    }
    try:
        assert value_dict.get('dtype') == 'stred'
        assert value_dict.get('pid') == '12'
        assert value_dict.get('cid') in CIDS
        assert value_dict.get('path')
        assert value_dict.get('yandexuid')
    except AssertionError:
        return
    yield {
        key: value_dict[key]
        for key in ('yandexuid', 'cid', 'path')
    }


def map_events(rec):
    _path = rec['path']
    version = _path.split('.')[0]
    events = [
        'panel.bookmarks',
        'panel.bookpanel.folder',
        'panel.bookpanel.link',
        'panel.closedtabs',
        'panel.downloads',
        'panel.history',
        'panel.logo',
        'panel.search.button',
        'panel.search.enter',
        'panel.search.suggest',
        'panel.settings',
        'thumb.click',
    ]
    if any(event in _path for event in events):
        yield {
            'yandexuid': rec['yandexuid'],
            'browser': CIDS.get(rec['cid'], rec['cid']),
            'version': version,
        }


def get_uniq_users(key, recs):
    cnt = 0
    for rec in recs:
        if cnt == 0:
            browser = rec['browser']
            version = rec['version']
            yield {
                'browser': browser,
                'version': version,
            }
            cnt += 1


def sum_users(key, recs):
    users_sum = sum(1 for rec in recs)
    yield {
        'browser': key['browser'],
        'version': key['version'],
        'dau': users_sum,
    }


def add_totals(data):
    browsers = set([line['browser'] for line in data])
    for browser in browsers:
        browser_total_dau = sum(
            int(line['dau'])
            for line in data
            if line['browser'] == browser
        )
        yield {
            'browser': browser,
            'version': '_total_',
            'dau': browser_total_dau
        }


def main():
    source_table_prefix = '//statbox/redir-log/'
    today = datetime.today()
    yesterday = (today - timedelta(days=1)).strftime('%Y-%m-%d')
    try:
        datefrom = sys.argv[1]
    except:
        datefrom = yesterday
    try:
        dateto = sys.argv[2]
    except:
        dateto = yesterday

    for day in daterange(datefrom, dateto):
        redirlog_table = source_table_prefix + day

        filtered = '//home/atom/chikachoff/vb/dau_filtered'
        mapped = '//home/atom/chikachoff/vb/dau_mapped'
        uniq_users = '//home/atom/chikachoff/vb/dau_uniq_users'
        dau = '//home/atom/chikachoff/vb/dau_dau'

        # yt.run_erase(filtered)
        # yt.run_erase(mapped)
        # yt.run_erase(uniq_users)
        # yt.run_erase(dau)

        yt.run_map(
            _filter,
            source_table=redirlog_table,
            destination_table=filtered,
            format=yt.DsvFormat()
        )
        yt.run_map(
            map_events,
            source_table=filtered,
            destination_table=mapped,
            format=yt.DsvFormat(),
        )
        yt.run_sort(
            mapped,
            mapped,
            sort_by=[
                'yandexuid',
                'browser',
                'version'
            ]
        )
        yt.run_reduce(
            get_uniq_users,
            source_table=mapped,
            destination_table=uniq_users,
            format=yt.DsvFormat(),
            reduce_by=['yandexuid']
        )
        yt.run_sort(
            uniq_users,
            uniq_users,
            sort_by=[
                'browser',
                'version'
            ]
        )
        yt.run_reduce(
            sum_users,
            source_table=uniq_users,
            destination_table=dau,
            format=yt.DsvFormat(),
            reduce_by=[
                'browser',
                'version'
            ]
        )

        data = [
            rec for rec in
            yt.read_table(dau, format='dsv', raw=False)
        ]

        for total in add_totals(data):
            data.append(total)

        for rec in data:
            rec['fielddate'] = day

        update_stat(data, STAT_PATH)
        print 'Portal updated date: {}. Url: {}'.format(day, STAT_URL)

        yt.run_erase(filtered)
        yt.run_erase(mapped)
        yt.run_erase(uniq_users)
        yt.run_erase(dau)


if __name__ == '__main__':
    main()
