#!/usr/bin/env python2.7
from __future__ import division
import datetime
import json
import urllib2
import urllib
import yt.wrapper as yt

__author__ = 'chikachoff'

def update_stat(rows):
    url = 'https://stat.yandex-team.ru/_api/report/data'
    headers = {'StatRobotUser': 'robot_alesten', 'StatRobotPassword': 'Wi1oowaiph'}
    values = {'name': 'Yandex_RU/Special/Metrics/Switch/audience',
              'scale': 'd',
              'data': json.dumps({'values': rows})}
    data = urllib.urlencode(values)
    req = urllib2.Request(url, data, headers)
    try:
        response = urllib2.urlopen(req)
    except urllib2.HTTPError as e:
        error_message = e.read()
        print error_message
        raise

def module_filter(module):
    if not module:
        return True
    name = getattr(module, '__name__', '')
    return not (name == 'uatraits' or name.startswith('statbox'))

yt.config["auto_merge_output"]["action"] = "merge"
yt.config["pickling"]["module_filter"] = module_filter
yt.config.set_proxy('plato.yt.yandex.net')
yt.config.CREATE_RECURSIVE = True
yt.config.TREAT_UNEXISTING_AS_EMPTY = True


class RedirLogMapper:

    def __call__(self, rec):
        items = rec['value'].split('\t')
        data = {}
        for item in items:
            k, v = item.split('=', 1)
            data[k] = v

        if data.has_key('vars') and data.has_key('yandexuid') and data.has_key('unixtime'):

            if ',' in data['vars']:
                vars_data = {}
                vars_items = data['vars'].split(',')
                for var_item in vars_items:
                    if '=' in var_item:
                        k, v = var_item.split('=', 1)
                        if k.startswith('-'):
                            k = k[1:]
                        vars_data[k] = v

                #if 'dayuse' in vars_data.keys() and 'ui' in vars_data.keys() and 'productname' in vars_data.keys() and 'ver' in vars_data.keys() and 'clid1' in vars_data.keys() and :
                if 'dayuse' in vars_data.keys() and 'bnrd' in vars_data.keys():
                    dayuse = vars_data['dayuse']
                    bnrd = vars_data['bnrd']
                    #productname = vars_data['productname']
                    #ver = vars_data['ver']
                    #ui = vars_data['ui']
                    if dayuse == '0' and bnrd[2:4] == '99':
                        if 'clid1' in vars_data.keys():
                            clid1 = vars_data['clid1']
                            if clid1 in ['2163765', '2189879', '2189880', '2224312', '2219044']:
                                yield {
                                    'yandexuid' : data['yandexuid'],
                                    'date' : datetime.datetime.fromtimestamp(int(data['unixtime'])).strftime('%Y-%m-%d'),
                                    'dayuse' : dayuse,
                                    'bnrd': bnrd,
                            #        'productname' : productname,
                                    'clid1' : clid1}
                        if 'clid5' in vars_data.keys():
                            clid5 = vars_data['clid5']
                            if clid5 in ['2163430', '2187268', '2224313', '2219045']:
                                yield {
                                    'yandexuid' : data['yandexuid'],
                                    'date' : datetime.datetime.fromtimestamp(int(data['unixtime'])).strftime('%Y-%m-%d'),
                                    'dayuse' : dayuse,
                                    'bnrd': bnrd,
                            #        'productname' : productname,
                                    'clid5' : clid5}
                        if 'clid6' in vars_data.keys():
                            clid6 = vars_data['clid6']
                            if clid6 in ['2196598', '2224314']:
                                yield {
                                    'yandexuid' : data['yandexuid'],
                                    'date' : datetime.datetime.fromtimestamp(int(data['unixtime'])).strftime('%Y-%m-%d'),
                                    'dayuse' : dayuse,
                                    'bnrd': bnrd,
                            #        'productname' : productname,
                                    'clid6' : clid6}
                        if 'clid9' in vars_data.keys():
                            clid9 = vars_data['clid9']
                            if clid9 in ['2187644']:
                                yield {
                                    'yandexuid' : data['yandexuid'],
                                    'date' : datetime.datetime.fromtimestamp(int(data['unixtime'])).strftime('%Y-%m-%d'),
                                    'dayuse' : dayuse,
                                    'bnrd': bnrd,
                            #        'productname' : productname,
                                    'clid9' : clid9}
                        if 'clid10' in vars_data.keys():
                            clid10 = vars_data['clid10']
                            if clid10 in ['2164776', '2220366', '2224315', '2224772']:
                                yield {
                                    'yandexuid' : data['yandexuid'],
                                    'date' : datetime.datetime.fromtimestamp(int(data['unixtime'])).strftime('%Y-%m-%d'),
                                    'dayuse' : dayuse,
                                    'bnrd': bnrd,
                            #        'productname' : productname,
                                    'clid10' : clid10}


class UniqueReducer():
    def __call__(self, key, recs):
        yield{
            'fielddate': key['date'],
            'installs': sum(1 for _ in recs)
            }


def main():
    source_table_prefix = '//statbox/redir-log/'
    install_table = '//home/tr-analysts/chikachoff/temp/dayuse0mapped'
    unique_install_table = '//home/tr-analysts/chikachoff/temp/dayuse0reduced'

    base = datetime.datetime.today()

    # Add last 30 days to mapped table
    # datelist = [(base - datetime.timedelta(days=x)).strftime('%Y-%m-%d') for x in range(0, 30)]
    # for ymd in datelist:
    #     source_table = source_table_prefix + ymd
    #     yt.run_map(
    #         RedirLogMapper(),
    #         source_table = source_table,
    #         destination_table = yt.TablePath(install_table, append=True),
    #         format=yt.DsvFormat())

    # Add yesterday data to mapped table
#    yesterday = (base - datetime.timedelta(days=1)).strftime('%Y-%m-%d')
#    source_table = source_table_prefix + yesterday
#    yt.run_map(
#        RedirLogMapper(),
#        source_table = source_table,
#        destination_table = yt.TablePath(install_table, append=True),
#        format=yt.DsvFormat())
#
#    yt.run_sort(
#        source_table = install_table,
#        destination_table = install_table,
#        sort_by=['date'])
#
    yt.run_reduce(
        UniqueReducer(),
        source_table = install_table,
        destination_table = yt.TablePath(unique_install_table),
        reduce_by=['date'],
        format=yt.DsvFormat())

    day_installs = [rec for rec in yt.read_table(unique_install_table, format='dsv', raw=False)]
    print day_installs

    update_stat(day_installs)
    print 'stat updated: Installs'
if __name__ == '__main__':
    main()
