#!/usr/bin/env python2.7
# coding=utf-8

from __future__ import division
from datetime import datetime, timedelta
import sys
from collections import defaultdict
import json
import urllib
import urllib2
import yt.wrapper as yt
from helpers import module_filter, update_stat, daterange


yt.config.TOKEN_PATH = ["/.yt/token"]
yt.config.set_proxy('hahn.yt.yandex.net')
yt.config.CREATE_RECURSIVE = True
yt.config.TREAT_UNEXISTING_AS_EMPTY = True

# yt.config.MERGE_INSTEAD_WARNING = 1
# yt.config.PYTHON_FUNCTION_MODULE_FILTER = module_filter
# yt.config.set_proxy('hahn.yt.yandex.net')
# yt.config.CREATE_RECURSIVE = True
# yt.config.TREAT_UNEXISTING_AS_EMPTY = True

def _filter(rec):
    splitstrs = (item.split('=', 1) for item in rec['value'].split('\t'))
    value = {name: value for name, value in splitstrs}

    try:
        date = rec['key'].split(' ')[-2]
        assert value.get('dtype') == 'stred'
        assert value.get('pid') == '12'
        assert value.get('cid') == '72873'
        assert value.get('path')
        assert value.get('yandexuid')
    except AssertionError:
        return

    path_values = value.get('path').split('.')
    if len(path_values) < 4:
        return

    if 'altsearchchrome' in path_values:
        return
    elems = path_values
    if len(elems) >= 4:
        browser = elems[0]
        version = elems[1]
        serp = elems[2]
        action = elems[3]

        if serp == 'bimg': serp = 'bing_img'
        if serp == 'bserp': serp = 'bing_web'
        if serp == 'bvid': serp = 'bing_video'
        if serp == 'gimg': serp = 'google_img'
        if serp == 'gserp': serp = 'google_web'
        if serp == 'gvid': serp = 'google_video'
        if serp == 'mimg': serp = 'mail_img'
        if serp == 'mserp': serp = 'mail_web'
        if serp == 'mvid': serp = 'mail_video'
        if serp == 'vkvid': serp = 'vk_video'
        if serp == 'yvid': serp = 'youtube_video'

        if browser == '': return
        if version == '': return
        if serp == '': return
        if not serp.isalpha: return
        if action == '': return
        if not action.isalpha: return
        if '%' in action: return
        if version == 'cfg': return

        metrics_all = [str(date),
                       str(browser),
                       str(version),
                       str(serp),
                       str(action)]

        key_all = '\t'.join(metrics_all)
        if len(key_all)>100:
            key_all = key_all[0:70]
        yield {'key_all': key_all}


def reduce(key, recs):
    total = sum(1 for rec in recs)
    yield {key['key_all']: total}


def update_statface_report(rows):
    url = 'https://stat.yandex-team.ru/_api/report/data'
    headers = {'StatRobotUser': 'robot_robot-spok', 'StatRobotPassword': 'xeev0Tah3g'}
    values = {'name': 'https://stat.yandex-team.ru/Soft/Others/AltSearch',
              'scale': 'd',
              'data': json.dumps({'values': rows})
    }
    data = urllib.urlencode(values)
    req = urllib2.Request(url, data, headers)
    try:
        response = urllib2.urlopen(req)
    except urllib2.HTTPError as e:
        error_message = e.read()
        print error_message
        raise


def update_all(field_date, data):
    heads = {}
    field_date = field_date
    for line in data:
        date = line.split('\t')[0]
        if date == field_date:
            browser = line.split('\t')[1]
            ver = line.split('\t')[2]
            serp = line.split('\t')[3]
            action = line.split('\t')[4]
            cnt = line.split('\t')[-1]

            browser_path = u'\t{}\t'.format(browser)
            ver_path = u'\t{}\t{}\t'.format(browser, ver)
            serp_path = u'\t{}\t{}\t{}\t'.format(browser, ver, serp)

            paths = [browser_path, ver_path, serp_path]

            for path in paths:
                if path not in heads:
                    heads[path] = defaultdict(int)

            for path in paths:
                heads[path][action] += int(cnt)
    upload_lst = []
    for head in heads:
        tmp_dct = {
            'fielddate': field_date,
            'path': head,
            'show': heads[head]['show'] or 0,
            'close': heads[head]['close'] or 0,
            'set': heads[head]['set'] or 0,
            'yandex': heads[head]['yandex'] or 0,
            'google': heads[head]['google'] or 0,
            'mail': heads[head]['mail'] or 0,
            'bing': heads[head]['bing'] or 0
        }
        upload_lst.append(tmp_dct)
        # print tmp_dct
    return update_statface_report(upload_lst)
    # return upload_lst


def main():
    source_table_prefix = '//statbox/redir-log/'
    datefrom = '2016-11-24'
    dateto = '2016-11-29'
    for day in daterange(datefrom, dateto):
        print(day)
        redirlog_table = source_table_prefix + day

        filtered = '//home/tr-analysts/kosotis/actions_serp_clicks_map_day'

        shows_per_user = '//home/tr-analysts/kosotis/actions_serp_clicks_reduce_day'
        try:

            yt.remove(filtered, recursive=True)
            yt.remove(shows_per_user, recursive=True)
        except:
            pass


        yt.run_map(_filter, source_table=redirlog_table, destination_table=filtered, format=yt.DsvFormat())
        yt.run_sort(filtered, filtered, sort_by=['key_all'])
        yt.run_reduce(reduce, source_table=filtered, destination_table=shows_per_user, format=yt.DsvFormat(),
                      reduce_by=['key_all'], )
        yt.run_sort(shows_per_user, shows_per_user, sort_by=['key_all'])

        data = []
        tmp_data = (
            rec for rec in
            yt.read_table(shows_per_user, format='dsv', raw=False)
        )
        for i in tmp_data:
            for k, v in i.items():
                data.append(k + '\t' + v)

        field_date = day
        update_all(field_date, data)

        yt.remove(filtered, recursive=True)
        yt.remove(shows_per_user, recursive=True)


if __name__ == '__main__':
    main()
