# -*- coding: UTF-8 -*-

from nile.api.v1 import (
    filters as nf,
    aggregators as na,
    extractors as ne,
    statface as ns,
    clusters,
    Record
)
from qb2.api.v1 import (
    extractors as se,
    filters as sf
)
import argparse
import datetime
import sys
import nile
import json
import time
import collections
import pandas as pd
import itertools

STAT_API = "https://upload.stat.yandex-team.ru/_api/report/data"


def parce_campain(campain):
    return {one[0] : one[1] for one in map(lambda x: x.split("="), campain.split(";"))}

def parse_response(rows):
    for row in rows:
        try:
            text = row["raw_event_value"]
            obj = json.loads(text)
            app_id = obj.get('app_id', 'empty')
            api_level = obj.get('api_level', 'empty')
            if 'campaigns' in obj:
                for key, value in obj['campaigns'].items():
                    yield Record(product=parce_campain(key)['product'],
                                bannerid=parce_campain(key)['bannerid'],
                                reqid=parce_campain(key)['reqid'],
                                status=str(value),
                                device_id=row['device_id'],
                                app_platform=row['app_platform'],
                                app_id=app_id,
                                api_level=api_level)
        except Exception:
            pass

def parse_dissomething(rows):
    for row in rows:
        try:
            event_name = row['event_name']
            text = row["raw_event_value"]
            obj = json.loads(text)
            param = parce_campain(obj['campaign_id'])
            yield Record(bannerid=param['bannerid'],
                                reqid=param['reqid'],
                                event_name=event_name)
        except Exception, e:
            sys.stderr.write('ERROR: {}'.format(str(e)))

def new_status(rows):
    for row in rows:
        try:
            if row['status'] == "success":
                out = vars(row)
                out['status'] = row['status'] + "_" + row.get("event_name", "empty")
                yield Record(**out)
            else:
                yield row
        except Exception, e:
            sys.stderr.write('ERROR: {}'.format(str(e)))

def reducer(groups):
    for key, records in groups:
        statuses = [record['status'] for record in records]
        unique_statuses = set(statuses)
        only_none = (unique_statuses == {'none'})*1
        yield Record(key, only_none=only_none, count=1, **{one: 1 for one in unique_statuses})

def reducer2(groups):
    for key, records in groups:
        counter = collections.Counter()
        for record in records:
            for one in vars(record):
                if one != 'device_id' and one != 'product' and one != 'app_platform':
                    counter[one] += record[one]
        for one in counter:
            yield Record(key, status=one, sum=counter[one])

def dict_to_str(smh):
    try:
        key, value = json.loads(smh.replace("u'", '"').replace("'", '"')).items()[0]
        return "{}_{}".format(key, value)
    except Exception:
        return smh

def get_stat_login():
    with open('/home/zaringleb/.stat_login') as f:
        return f.readline().strip()

STAT_API = "https://upload.stat.yandex-team.ru/_api/report/data"

def post_to_stat(df,path,api_url=STAT_API):
    import requests
    headers = {
        'StatRobotUser': 'robot_zaringleb',
        'StatRobotPassword': get_stat_login()
    }
    data = {
        'scale':'d',
        'name': path,
        'tsv_data': df.to_csv(sep='\t', index = False)
    }
    resp = requests.post(api_url,
                        headers = headers,
                        data = data)
    print resp.status_code
    return resp

def process_date(date):
    cluster = clusters.Hahn(pool = 'search-research_zaringleb')
    #parse log
    job = cluster.job()
    log = job.table('statbox/metrika-mobile-log/{}'.format(date))
    banners = log.qb2(log = 'metrika-mobile-log',
                          fields = ["raw_event_value", 'device_id', 'event_name', 'app_platform', 'api_key'],
                          filters = [sf.one_of('event_name', ['display', 'dismiss', 'response']),
                                     sf.equals('api_key', 86701)]) \
                        .put("//home/atom/zaringleb/promoliba_response/campaings_{}".format(date))
    job.run()

    #parse response
    job = cluster.job()
    log = job.table("//home/atom/zaringleb/promoliba_response/campaings_{}".format(date)) \
            .filter(nf.equals('event_name', 'response')) \
            .map(parse_response) \
            .put("//home/atom/zaringleb/promoliba_response/campaings_response_{}".format(date))
    job.run()

    #parse dissomething
    job = cluster.job()
    log = job.table("//home/atom/zaringleb/promoliba_response/campaings_{}".format(date)) \
            .filter(sf.one_of('event_name', ['display', 'dismiss'])) \
            .map(parse_dissomething) \
            .put("//home/atom/zaringleb/promoliba_response/campaings_dissomething_{}".format(date))
    job.run()

    #join status and event_name
    job = cluster.job()
    dissomething = job.table("//home/atom/zaringleb/promoliba_response/campaings_dissomething_{}".format(date))
    table = job.table("//home/atom/zaringleb/promoliba_response/campaings_response_{}".format(date)) \
                        .join(dissomething, by=["reqid", "bannerid"], type='left') \
                        .map(new_status) \
                        .put("//home/atom/zaringleb/promoliba_response/campaings_joined_{}".format(date))
    job.run()

    #culc count
    job = cluster.job()
    log = job.table("//home/atom/zaringleb/promoliba_response/campaings_joined_{}".format(date)) \
            .groupby('status', 'product', 'app_platform', 'app_id', 'api_level') \
            .aggregate(count=na.count()) \
            .put("//home/atom/zaringleb/promoliba_response/campaings_count_total_{}".format(date))
    job.run()

    #culc ratio
    job = cluster.job()
    log = job.table("//home/atom/zaringleb/promoliba_response/campaings_count_total_{}".format(date)) \
            .groupby('status', 'product', 'app_platform') \
            .aggregate(count=na.sum('count')) \
            .put("//home/atom/zaringleb/promoliba_response/campaings_grouped_total_{}".format(date))
    job.run()

    #culc share
    job = cluster.job()
    log = job.table("//home/atom/zaringleb/promoliba_response/campaings_joined_{}".format(date)) \
            .groupby('device_id', 'product', 'app_platform') \
            .reduce(reducer) \
            .groupby('product', 'app_platform') \
            .reduce(reducer2) \
            .put("//home/atom/zaringleb/promoliba_response/campaings_grouped_{}".format(date))
    job.run()

    #load count
    job = cluster.job()
    records = job.table("//home/atom/zaringleb/promoliba_response/campaings_count_total_{}".format(date)).read()
    df_count = records.as_dataframe()
    df_count['status'] = df_count['status'].apply(dict_to_str)
    df_count['fielddate'] = date

    #sum _total_
    cls = ['api_level', 'app_id', 'app_platform', 'product', 'status']
    df_tmps = []
    for i in range(len(cls)):
        print(i)
        for one in itertools.combinations(cls, i+1):
            not_one = list(set(cls) - set(one))
            if not_one:
                df_tmp = df_count.groupby(not_one + ['fielddate'], as_index=False).agg({'count': sum})
            else:
                pass
                df_tmp = df_count.head(1).copy()
                df_tmp['count'] = df_count['count'].sum()
            for col in one:
                df_tmp[col] = "_total_"
            df_tmps.append(df_tmp)
    df_count = pd.concat([df_count] + df_tmps)
    #post df_count
    post_to_stat(df_count, path='Distribution/Others/promoliba_monitoring')

    #load ratio
    job = cluster.job()
    records = job.table("//home/atom/zaringleb/promoliba_response/campaings_grouped_total_{}".format(date)).read()
    df_ratio = records.as_dataframe()
    df_tmp = df_ratio.groupby(['product', 'status'], as_index=False).agg({"count" : sum})
    df_tmp['app_platform'] = '_total_'
    df_ratio = pd.concat([df_ratio, df_tmp])
    df_ratio_total = df_ratio.groupby(['product', 'app_platform'], as_index=False).agg({"count" : sum}).rename(columns={'count': 'count_total'})
    df_ratio = pd.merge(df_ratio, df_ratio_total, on=['product', 'app_platform'])
    df_ratio['status_ratio'] = df_ratio['count']*1./df_ratio['count_total']

    #load share
    job = cluster.job()
    records = job.table("//home/atom/zaringleb/promoliba_response/campaings_grouped_{}".format(date)).read()
    df = records.as_dataframe()
    df_tmp = df.groupby(['product', 'status'], as_index=False).agg({"sum" : sum})
    df_tmp['app_platform'] = '_total_'
    df = pd.concat([df, df_tmp])
    df_tmp = pd.merge(df.loc[df['status'] == 'count', ['product', 'app_platform', 'sum']].rename(columns={'sum': 'sum_count'}), df.loc[df['status'] == 'only_none', ['product', 'app_platform', 'sum']].rename(columns={'sum': 'only_none'}), on=['product', 'app_platform'])
    df_tmp['count_real'] = df_tmp['sum_count'] - df_tmp['only_none']
    df2 = pd.merge(df[df['status'].isin(['count', 'only_none']) == False], df_tmp[['product', 'app_platform', 'count_real']], on=['product', 'app_platform'])
    df2['is_status_ratio'] = df2['sum']*1./df2['count_real']

    #prepare out table
    df2 = pd.merge(df_ratio[['status', 'status_ratio', 'product', 'app_platform']], df2[['is_status_ratio', 'status', 'product', 'app_platform']], on=['status', 'product', 'app_platform'])
    df2['status'] = df2['status'].apply(dict_to_str)
    df2['fielddate'] = date

    #post data
    post_to_stat(df2, path='Distribution/Others/promoliba_product_response')

def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--date', action='store')
    args = parser.parse_args()
    date = args.date or (datetime.datetime.now() - datetime.timedelta(days=1)).date().isoformat()
    process_date(date)

if __name__ == '__main__':
    main()
