#!/usr/bin/env python2.7
# coding=utf-8
from __future__ import division
from helpers import module_filter, update_stat, daterange
from datetime import datetime, timedelta
import sys
from datetime import date
from datetime import timedelta

try:
    import yt.wrapper as yt

except:
    print ('yt.wrapper not imported (probably script started locally)')
yt.config.TOKEN_PATH = "/.yt/token"
yt.config["auto_merge_output"]["action"] = "merge"
yt.config["pickling"]["module_filter"] = module_filter
yt.config.set_proxy('hahn.yt.yandex.net')
yt.config.CREATE_RECURSIVE = True
yt.config.TREAT_UNEXISTING_AS_EMPTY = True
# yt.config["pickling"]["enable_rankingfs_archive"] = False

STAT_HOST = 'https://stat.yandex-team.ru/'
STAT_PATH = 'Soft/Others/AltSearch_visits_to_google'
STAT_URL = ''.join([STAT_HOST, STAT_PATH])

def _filter(rec):
    splitstrs = (item.split('=', 1) for item in rec['value'].split('\t'))
    value = {name: value for name, value in splitstrs}

    try:
        tm = rec['key'].split(' ')[-2]
        assert value.get('dtype') == 'stred'
        assert value.get('pid') == '12'
        assert value.get('cid') == '72873'
        assert value.get('path')
        assert value.get('yandexuid')
    except AssertionError:return

    get_serp = value.get('path').split('.')
    if len(get_serp)< 4: return
    serp = get_serp[3]
    if not(serp == 'google' or serp == 'yandex' or serp == 'bing' or serp == 'mail'): return
    if '%' in serp:return
    yield {'serp': serp,'tm': tm}

def reduce(key, recs):
    total = sum(1 for rec in recs)
    yield {
        key['serp']:total,
    }

def main():
    source_table_prefix = '//statbox/redir-log/'
    datefrom = '2016-11-23'
    dateto = '2016-11-24'

    for day in daterange(datefrom, dateto):
        redirlog_table = source_table_prefix + day
        print('Date: ' + day)

        filtered = '//home/tr-analysts/kosotis/stat_serp_map_day'
        shows_per_user = '//home/tr-analysts/kosotis/stat_serp_reduce_day'

        try:
            yt.remove(filtered, recursive=True)
            yt.remove(shows_per_user, recursive=True)
        except:
            pass


        yt.run_map(_filter, source_table=redirlog_table, destination_table=filtered, format=yt.DsvFormat())
        yt.run_sort(filtered, filtered, sort_by=['serp'])
        yt.run_reduce(reduce, source_table=filtered, destination_table=shows_per_user, format=yt.DsvFormat(),
                      reduce_by=['serp'], )
        yt.run_sort(shows_per_user, shows_per_user, sort_by=['serp'])
        data = [
            rec for rec in
            yt.read_table(shows_per_user, format='dsv', raw=False)
        ]
        stat = {}
        for rec in data:
            rec['fielddate'] = day
            stat.update(rec)
        data_stat = [stat]

        update_stat(data_stat, STAT_PATH)
        yt.remove(filtered, recursive=True)
        yt.remove(shows_per_user, recursive=True)

if __name__ == '__main__':
    main()
