#!/usr/bin/env python2.7
# coding=utf-8
'''
From redir_log counts navigates.
https://wiki.yandex-team.ru/visualbookmarks/tz/usermetrics/dashboard/#navigejjtynapolzovatelja
'''
from __future__ import division
from datetime import datetime, timedelta
from collections import defaultdict
import sys
from helpers import module_filter, update_stat, daterange
try:
    import yt.wrapper as yt
    yt.config['token'] = 'AVImKQMAAAO3VDAg54V4QjeI8eaNZJSQfA'
except:
    print 'yt.wrapper not imported (probably script started locally)'

yt.config["auto_merge_output"]["action"] = "merge"
yt.config["pickling"]["module_filter"] = module_filter
yt.config["pickling"]["enable_tmpfs_archive"] = False
yt.config.set_proxy('hahn.yt.yandex.net')
yt.config.CREATE_RECURSIVE = True
yt.config.TREAT_UNEXISTING_AS_EMPTY = True


STAT_HOST = 'https://stat.yandex-team.ru/'
STAT_PATH = 'Distribution/vbch/ctr'
STAT_URL = ''.join([STAT_HOST, STAT_PATH])

CIDS = {
    '72480': 'fx',
    '72481': 'ie',
    '72482': 'ch'
}


def _filter(rec):
    clicked_thumb = ''
    has_show = 0
    splitstrs = (item.split('=', 1) for item in rec['value'].split('\t'))
    value = {name: value for name, value in splitstrs}
    try:
        assert value.get('dtype') == 'stred'
        assert value.get('pid') == '12'
        assert value.get('cid') in CIDS
    except AssertionError:
        return

    path = value.get('path')
    if path:
        if 'thumb.click.' in path:
            substr_after_thumbclick = path.split('thumb.click.')[1]
            clicked_thumb = substr_after_thumbclick.split('.')[0]
        if path.endswith('.system.show'):
            has_show = 1

    yield {
        'browser': CIDS[value['cid']],
        'clicked_thumb': clicked_thumb,
        'has_show': has_show
    }


def sum_shows_clicks(key, recs):
    shows = 0
    clicked_thumbs = defaultdict(int)
    for rec in recs:
        shows += int(rec['has_show'])
        clicked_thumbs[rec['clicked_thumb']] += 1
    if shows != 0:
        for thumb in clicked_thumbs:
            if thumb:
                clicked_thumbs[thumb] = clicked_thumbs[thumb] / shows

        result = clicked_thumbs.update(
            {
                'browser': key['browser'],
                'shows': shows,
            }
        )
        yield dict(result)


def main():
    source_table_prefix = '//statbox/redir-log/'
    today = datetime.today()
    yesterday = (today - timedelta(days=1)).strftime('%Y-%m-%d')
    try:
        datefrom = sys.argv[1]
    except:
        datefrom = yesterday
    try:
        dateto = sys.argv[2]
    except:
        dateto = yesterday

    for day in daterange(datefrom, dateto):
        redirlog_table = source_table_prefix + day
        filtered = '//home/ranking/chikachoff/vb/ctr_thumb/filtered'
        reduced = '//home/ranking/chikachoff/vb/ctr_thumb/reduced'

        yt.run_erase(filtered)
        yt.run_erase(reduced)

        yt.run_map(
            _filter,
            source_table=redirlog_table,
            destination_table=filtered,
            format=yt.DsvFormat()
        )

        yt.run_sort(filtered, filtered, sort_by=['browser', 'has_show',
                                                 'clicked_thumb'])

        yt.run_reduce(
            sum_shows_clicks,
            source_table=filtered,
            destination_table=reduced,
            format=yt.DsvFormat(),
            reduce_by=['browser']
        )

        data = [
            rec for rec in
            yt.read_table(reduced, format='dsv', raw=False)
        ]

        for rec in data:
            rec['fielddate'] = day

        for rec in data:
            print rec
        # update_stat(data, STAT_PATH)
        # print 'Portal updated date: {}. Url: {}'.format(day, STAT_URL)

        # yt.run_erase(filtered)
        # yt.run_erase(reduced)


if __name__ == '__main__':
    main()
