#!/usr/bin/env python2.7
from __future__ import division
from datetime import datetime, timedelta
import json
import urllib2
import urllib
import yt.wrapper as yt
from collections import defaultdict
from daily_audience_by_dayuse import RedirLogMapper as AudienceMapper
from daily_audience_by_dayuse import UniqueReducer as AudienceReducer

__author__ = 'chikachoff'

def update_stat(rows):
    url = 'https://stat.yandex-team.ru/_api/report/data'
    headers = {'StatRobotUser': 'robot_alesten', 'StatRobotPassword': 'Wi1oowaiph'}
    values = {'name': 'Yandex_RU/Special/Metrics/Switch/audience',
              'scale': 'd',
              'data': json.dumps({'values': rows})}
    data = urllib.urlencode(values)
    req = urllib2.Request(url, data, headers)
    try:
        response = urllib2.urlopen(req)
    except urllib2.HTTPError as e:
        error_message = e.read()
        print error_message
        raise

def module_filter(module):
    if not module:
        return True
    name = getattr(module, '__name__', '')
    return not (name == 'uatraits' or name.startswith('statbox'))

yt.config["auto_merge_output"]["action"] = "merge"
yt.config["pickling"]["module_filter"] = module_filter
yt.config.set_proxy('plato.yt.yandex.net')
yt.config.CREATE_RECURSIVE = True
yt.config.TREAT_UNEXISTING_AS_EMPTY = True


def vars_to_dict(vars_value):
    vars_dict = {}
    vars_items = vars_value.split(',')
    for var_item in vars_items:
        if '=' in var_item:
            k, v = var_item.split('=', 1)
            if k.startswith('-'):
                k = k[1:]
            vars_dict[k] = v
    return vars_dict


class FullVarsMapper:

    def __call__(self, rec):
        data = {}
        for item in rec['value'].split('\t'):
            k, v = item.split('=', 1)
            data[k] = v
        if all([data.has_key('vars'),
                data.has_key('yandexuid'),
                data.has_key('unixtime')]):
            if ',' in data['vars']:
                vars_data = vars_to_dict(data['vars'])
                if vars_data.has_key('dayuse'):
                    yield {
                        'yandexuid' : data['yandexuid'],
                        'date' : datetime.datetime.fromtimestamp(int(data['unixtime'])).strftime('%Y-%m-%d'),
                        'dayuse': vars_data['dayuse'],
                        'bnrd': vars_data.get('bnrd', '----------'),
                        'bro':  vars_data.get('bro', ''),
                        'productname': vars_data.get('productname', ''),
                        'ver': vars_data.get('ver', ''),
                        'ui': vars_data.get('ui', ''),
                        'clid1': vars_data.get('clid1', ''),
                        'clid5': vars_data.get('clid5', ''),
                        'clid6': vars_data.get('clid6', ''),
                        'clid9': vars_data.get('clid9', ''),
                        'clid10': vars_data.get('clid10', ''),
                        'vars': data['vars']}

class RedirLogMapper:

    def __call__(self, rec):
        items = rec['value'].split('\t')
        data = {}
        for item in items:
            k, v = item.split('=', 1)
            data[k] = v

        if data.has_key('vars') and data.has_key('yandexuid') and data.has_key('unixtime'):
            if ',' in data['vars']:
                vars_data = {}
                vars_items = data['vars'].split(',')
                for var_item in vars_items:
                    if '=' in var_item:
                        k, v = var_item.split('=', 1)
                        if k.startswith('-'):
                            k = k[1:]
                        vars_data[k] = v

                if 'dayuse' in vars_data.keys():
                    dayuse = vars_data['dayuse']
                    if 'bnrd' in vars_data.keys():
                        bnrd = vars_data['bnrd']
                    else:
                        bnrd = '----------'
                    #if bnrd[2:4] == '99':
                    if 'clid1' in vars_data.keys():
                        clid1 = vars_data['clid1']
                        if clid1 in ['2163765', '2189879', '2189880', '2224312', '2219044']:
                            yield {
                                'yandexuid' : data['yandexuid'],
                                'date' : datetime.fromtimestamp(int(data['unixtime'])).strftime('%Y-%m-%d'),
                                'dayuse' : dayuse,
                                'bnrd': bnrd,
                                'clid1' : clid1}
                    if 'clid5' in vars_data.keys():
                        clid5 = vars_data['clid5']
                        if clid5 in ['2163430', '2187268', '2224313', '2219045']:
                            yield {
                                'yandexuid' : data['yandexuid'],
                                'date' : datetime.fromtimestamp(int(data['unixtime'])).strftime('%Y-%m-%d'),
                                'dayuse' : dayuse,
                                'bnrd': bnrd,
                                'clid5' : clid5}
                    if 'clid6' in vars_data.keys():
                        clid6 = vars_data['clid6']
                        if clid6 in ['2196598', '2224314']:
                            yield {
                                'yandexuid' : data['yandexuid'],
                                'date' : datetime.fromtimestamp(int(data['unixtime'])).strftime('%Y-%m-%d'),
                                'dayuse' : dayuse,
                                'bnrd': bnrd,
                                'clid6' : clid6}
                    if 'clid9' in vars_data.keys():
                        clid9 = vars_data['clid9']
                        if clid9 in ['2187644']:
                            yield {
                                'yandexuid' : data['yandexuid'],
                                'date' : datetime.fromtimestamp(int(data['unixtime'])).strftime('%Y-%m-%d'),
                                'dayuse' : dayuse,
                                'bnrd': bnrd,
                                'clid9' : clid9}
                    if 'clid10' in vars_data.keys():
                        clid10 = vars_data['clid10']
                        if clid10 in ['2164776', '2220366', '2224315', '2224772']:
                            yield {
                                'yandexuid' : data['yandexuid'],
                                'date' : datetime.fromtimestamp(int(data['unixtime'])).strftime('%Y-%m-%d'),
                                'dayuse' : dayuse,
                                'bnrd': bnrd,
                                'clid10' : clid10}


class UniqueReducer():
    def __call__(self, key, recs):
        values = defaultdict(int)
        for rec in recs:
            if rec['bnrd'][2:4] == '99':
                values['audience'] += 1
                if rec['dayuse'] == '0':
                    values['installs'] += 1
            else:
                values['audience_nonSwitch'] += 1
                if rec['dayuse'] == '0':
                    values['installs_nonSwitch'] += 1

        yield{
            'fielddate': key['date'],
            'installsSwitch': values['installs'],
            'audienceSwitch': values['audience'],
            'installsNonSwitch': values['installs_nonSwitch'],
            'audienceNonSwitch': values['audience_nonSwitch']
            }


def daterange(start, end):
    start = datetime.strptime(start, '%Y-%m-%d')
    end = datetime.strptime(end, '%Y-%m-%d')
    current = start
    while current <= end:
        yield current.strftime('%Y-%m-%d')
        current += timedelta(days=1)


def main():
    source_table_prefix = '//statbox/redir-log/'
    install_table = '//home/tr-analysts/chikachoff/switch/audience_map'
    unique_install_table = '//home/tr-analysts/chikachoff/switch/audience_reduce'

    base = datetime.today()

    # Add last 100 days to mapped table
#    datelist = [(base - timedelta(days=x)).strftime('%Y-%m-%d') for x in range(0, 100)]
#    for ymd in datelist:
#        source_table = source_table_prefix + ymd
#        yt.run_map(
#            RedirLogMapper(),
#            source_table = source_table,
#            destination_table = yt.TablePath(install_table, append=True),
#            format=yt.DsvFormat())


    # Full map
#    yt.run_map(
#        FullVarsMapper(),
#        source_table = source_table_prefix + (base - timedelta(days=1)).strftime('%Y-%m-%d'),
#        destination_table = '//home/tr-analysts/chikachoff/switch/audience_full_map',
#        format=yt.DsvFormat())
#    yt.run_sort(
#        source_table = '//home/tr-analysts/chikachoff/switch/audience_full_map',
#        destination_table ='//home/tr-analysts/chikachoff/switch/audience_full_map' ,
#        sort_by=['date'])
#    yt.run_reduce(
#        UniqueReducer(),
#        source_table = install_table,
#        destination_table = yt.TablePath(unique_install_table),
#        reduce_by=['date'],
#        format=yt.DsvFormat())

    # Yesterday data
    yesterday = (base - timedelta(days=1)).strftime('%Y-%m-%d')
    source_table = source_table_prefix + yesterday
    install_table = '//home/tr-analysts/chikachoff/switch/installs_map_' + yesterday
    unique_install_table = '//home/tr-analysts/chikachoff/switch/installs_reduce_' + yesterday
    # History data
    #for day in daterange('2015-11-05', '2015-11-05'):
    for day in daterange(yesterday, yesterday):
        source_table = source_table_prefix + day
        install_table = '//home/tr-analysts/chikachoff/switch/installs_map_' + day
        unique_install_table = '//home/tr-analysts/chikachoff/switch/installs_reduce_' + day
#        yt.run_erase(install_table)
#        yt.run_erase(unique_install_table)
        yt.run_map(
            RedirLogMapper(),
            source_table = source_table,
            destination_table = yt.TablePath(install_table),
            format=yt.DsvFormat())
        yt.run_sort(
            source_table = install_table,
            destination_table = install_table,
            sort_by=['date'])
        yt.run_reduce(
            UniqueReducer(),
            source_table = install_table,
            destination_table = yt.TablePath(unique_install_table),
            reduce_by=['date'],
            format=yt.DsvFormat())
        day_installs = [rec for rec in yt.read_table(unique_install_table, format='dsv', raw=False)]
        print day_installs
        update_stat(day_installs)
        print 'stat updated: Installs'


if __name__ == '__main__':
    main()
