#!/usr/bin/env python2.7
from __future__ import division
from datetime import datetime, timedelta
from collections import defaultdict
import os
import re
import json
import sys
import traceback
import argparse

try:
    import yt.wrapper as yt
    yt.config['token'] = 'AVImKQMAAAO3VDAg54V4QjeI8eaNZJSQfA'
except:
    print 'yt.wrapper not imported (probably script started locally)'

from helpers import country_by_clid, update_stat, daterange

__author__ = 'chikachoff'

report_url = 'Yandex_RU/Special/Metrics/Switch/productname'
stat_url = 'https://stat.yandex-team.ru/' + report_url
channelsgrid_path = './channelsgrid.json'

poloski = re.compile('010104....')
ssylka = re.compile('040104....')
popup = re.compile('610104....')
morda = {poloski: 'Morda.Poloski',
         ssylka: 'Morda.SsylkaMainpage',
         popup: 'Morda.Popup'}


def get_args():
    '''
    This function parses and return arguments passed in.
    See https://gist.github.com/redja/9276216 reference about argparse
    '''
    # Assign description to the help doc
    parser = argparse.ArgumentParser(
        description='Calc ltv metrics for given perion of time and draw graph')
    # Add arguments
    parser.add_argument(
        '-s', '--start', type=str, help='Start date. Format: "YYYY-MM-DD"',
        required=False)
    parser.add_argument(
        '-e', '--end', type=str, help='End date.Format: "YYYY-MM-DD"',
        required=False)
    # Array for all arguments passed to script
    args = parser.parse_args()
    # Assign args to variables
    startdate = args.start
    enddate = args.end
    # Return all variable values
    return startdate, enddate


def update_banerids():
    os.system('svn up resources/')


def module_filter(module):
    if not module:
        return True
    name = getattr(module, '__name__', '')
    return not (name == 'uatraits' or name.startswith('statbox'))

yt.config["auto_merge_output"]["action"] = "merge"
yt.config["pickling"]["module_filter"] = module_filter
yt.config["pickling"]["enable_tmpfs_archive"] = False
yt.config.set_proxy('hahn.yt.yandex.net')
yt.config.CREATE_RECURSIVE = True
yt.config.TREAT_UNEXISTING_AS_EMPTY = True


def vars_to_dict(vars_value):
    vars_dict = {}
    vars_items = vars_value.split(',')
    for var_item in vars_items:
        if '=' in var_item:
            k, v = var_item.split('=', 1)
            if k.startswith('-'):
                k = k[1:]
            vars_dict[k] = v
    return vars_dict


class FullVarsMapper:
    def __call__(self, rec):
        data = {}
        for item in rec['value'].split('\t'):
            k, v = item.split('=', 1)
            data[k] = v

        if not all([
            'vars' in data,
            'unixtime' in data
        ]):
            return

        if ',' in data['vars']:
            vars_data = vars_to_dict(data['vars'])
        else:
            return

        if 'dayuse' in vars_data:
            tstamp = datetime.fromtimestamp(int(data['unixtime']))
            yield {
                'date': tstamp.strftime('%Y-%m-%d'),
                'dayuse': vars_data['dayuse'],
                'bnrd': vars_data.get('bnrd', '----------'),
                'productname': vars_data.get('productname', ''),
                'clid1': vars_data.get('clid1', ''),
                # 'yandexuid': data['yandexuid'],
                # 'bro':  vars_data.get('bro', ''),
                # 'ver': vars_data.get('ver', ''),
                # 'ui': vars_data.get('ui', ''),
                # 'vars': data['vars']
            }


class ProductnameReducer():
    def __init__(self, channelsgrid):
        self.channelsgrid = channelsgrid

    def __call__(self, key, recs):
        values = defaultdict(dict)
        for rec in recs:
            prodname = values[rec['productname']]
            banner_in_switch_banners = rec['bnrd'][2:4] in ['98', '99']
            country = country_by_clid(rec['clid1'], self.channelsgrid)
            if country == 'ru':
                if banner_in_switch_banners:
                    prodname.setdefault('switch_audience', 0)
                    prodname['switch_audience'] += 1
                    if rec['dayuse'] == '0':
                        prodname.setdefault('switch_installs', 0)
                        prodname['switch_installs'] += 1
                else:
                    prodname.setdefault('nonswitch_audience', 0)
                    prodname['nonswitch_audience'] += 1
                    if rec['dayuse'] == '0':
                        prodname.setdefault('nonswitch_installs', 0)
                        prodname['nonswitch_installs'] += 1
        for val in values:
            if sum(values[val].values()) > 0:
                result = dict(values[val])
                result.update({'productname': val,
                               'fielddate': key['date']})
                yield result


def main():
    startdate, enddate = get_args()
    with open(channelsgrid_path) as fp:
        channelsgrid = fp.read()
        channelsgrid = json.loads(channelsgrid)
    source_table_prefix = '//statbox/redir-log/'
    # today = datetime.today()
#     yesterday = (today - timedelta(days=1)).strftime('%Y-%m-%d')
#     try:
#         startdate = sys.argv[1]
#     except:
#         print 'Using default "datefrom"(yesterday). Debug traceback follows:'
#         print traceback.format_exc()
#         startdate = yesterday

#     try:
#         enddate = sys.argv[2]
#     except:
#         enddate = yesterday

    for day in daterange(startdate, enddate):
        print 'Day: ', day
        redirlog_table = source_table_prefix + day
        map_table = '//home/ranking/chikachoff/switch/productname_map'
        red_table = '//home/ranking/chikachoff/switch/productname_reduce'
        yt.run_erase(map_table)
        yt.run_erase(red_table)
        yt.run_map(
            FullVarsMapper(),
            source_table=redirlog_table,
            destination_table=map_table,
            format=yt.DsvFormat())
        yt.run_sort(
            source_table=map_table,
            destination_table=map_table,
            sort_by=['date'])
        yt.run_reduce(
            ProductnameReducer(channelsgrid=channelsgrid),
            source_table=map_table,
            destination_table=yt.TablePath(red_table),
            reduce_by=['date'],
            format=yt.DsvFormat(),
            local_files=[channelsgrid_path])
        data = [rec for rec in
                yt.read_table(red_table, format='dsv', raw=False)
                if rec['productname'] != '']
        print data
        update_stat(data, report_url)
        print 'Productname RU updated date: {}. Url: {}'.format(day, stat_url)

if __name__ == '__main__':
    main()
