from nile.api.v1 import (
    aggregators as na,
    filters as nf,
    extractors as ne,
    grouping as ng,
    clusters,
    files,
    statface,
    Record,
    Template,
    Path
)

from qb2.api.v1 import filters as sf
from qb2.api.v1 import QB2, extractors as se
import pandas as pd
import datetime
import requests
import numpy as np
import datetime
import re
import json
enddate = datetime.datetime.now() - datetime.timedelta(days=1)
enddate = str(enddate).split(' ')[0]
startdate = '2016-11-18'
def week_start(x):
    try:
        return str(datetime.datetime.strptime(str(x), '%Y-%m-%d') - datetime.timedelta(days=datetime.datetime.strptime(str(x), '%Y-%m-%d').weekday())).split(' ')[0]
    except:
        return None
def month_start(x):
    try:
        return str(datetime.datetime.strptime(str(x), '%Y-%m-%d').replace(day=1)).split(' ')[0]
    except:
        return None
def load_data_to_stat(res_data, scale='d', path = 'Distribution/ktereshin/appsflyer_dash/user_quality_index', robot = 'robot_ktereshin', password = '3en5cayAppeif6l'):
    print 'start load data for scale %s' % (scale)
    dates = list(res_data['fielddate'].unique())
    for date in dates:
        r_data = res_data[res_data['fielddate'] == date]
        csv_data = re.sub('\n$', '', r_data.to_csv(sep=';', index=False))
        r = requests.post(
            'https://upload.stat.yandex-team.ru/_api/report/data',
            headers={'StatRobotUser': robot, 'StatRobotPassword': password},
            data={
                'name': path,
                'scale': scale,
                'data': csv_data,
            },
        )
        print 'Done for %s' % (date)
def prepare_data(data, scale='d'):
    if scale == 'd':
        data['fielddate'] = data['install_date']
        res_data = pd.DataFrame(
            data.groupby(
                ['fielddate','media_source','campaign'])[['users','user_quality_index_sum']].sum()).reset_index()
        res_data = res_data.append(pd.DataFrame(data.groupby(['fielddate','media_source'])[['users','user_quality_index_sum']].sum()).reset_index())
        res_data = res_data.append(pd.DataFrame(data.groupby(['fielddate','campaign'])[['users','user_quality_index_sum']].sum()).reset_index())
        res_data = res_data.append(pd.DataFrame(data.groupby(['fielddate'])[['users','user_quality_index_sum']].sum()).reset_index())
        res_data = res_data.fillna('total')
        res_data = pd.DataFrame(
            res_data.groupby(
                ['fielddate','media_source','campaign'])[['users','user_quality_index_sum']].sum()).reset_index()
        res_data['user_quality_index_mean'] = res_data['user_quality_index_sum']/res_data['users']
        return res_data
    elif scale == 'w':
        data['fielddate'] = data['install_date'].apply(week_start)
        res_data = pd.DataFrame(
            data.groupby(
                ['fielddate','media_source','campaign'])[['users','user_quality_index_sum']].sum()).reset_index()
        res_data = res_data.append(pd.DataFrame(data.groupby(['fielddate','media_source'])[['users','user_quality_index_sum']].sum()).reset_index())
        res_data = res_data.append(pd.DataFrame(data.groupby(['fielddate','campaign'])[['users','user_quality_index_sum']].sum()).reset_index())
        res_data = res_data.append(pd.DataFrame(data.groupby(['fielddate'])[['users','user_quality_index_sum']].sum()).reset_index())
        res_data = res_data.fillna('total')
        res_data = pd.DataFrame(
            res_data.groupby(
                ['fielddate','media_source','campaign'])[['users','user_quality_index_sum']].sum()).reset_index()
        res_data['user_quality_index_mean'] = res_data['user_quality_index_sum']/res_data['users']
        return res_data
    else:
        data['fielddate'] = data['install_date'].apply(month_start)
        res_data = pd.DataFrame(
            data.groupby(
                ['fielddate','media_source','campaign'])[['users','user_quality_index_sum']].sum()).reset_index()
        res_data = res_data.append(pd.DataFrame(data.groupby(['fielddate','media_source'])[['users','user_quality_index_sum']].sum()).reset_index())
        res_data = res_data.append(pd.DataFrame(data.groupby(['fielddate','campaign'])[['users','user_quality_index_sum']].sum()).reset_index())
        res_data = res_data.append(pd.DataFrame(data.groupby(['fielddate'])[['users','user_quality_index_sum']].sum()).reset_index())
        res_data = res_data.fillna('total')
        res_data = pd.DataFrame(
            res_data.groupby(
                ['fielddate','media_source','campaign'])[['users','user_quality_index_sum']].sum()).reset_index()
        res_data['user_quality_index_mean'] = res_data['user_quality_index_sum']/res_data['users']
        return res_data
def get_user_quality(groups):
    for key, records in groups:
        install = 0
        user_quality_index = 0
        for record in records:
            if record.event_type == 'install':
                install = 1
            if record.event_name == 'user quality':
                user_quality_index = float(json.loads(record.event_value)['value'])
        yield Record(
            appsflyer_device_id = key.appsflyer_device_id,
            media_source = key.media_source,
            campaign = key.campaign,
            install_date = key.install_date,
            user_quality_index = user_quality_index,
            install = install
        )
def get_media_source(x,y):
    if x == 'null':
        return y.lower()
    else:
        return x.lower()
cluster = clusters.Hahn(pool='mobile-research', token='a6575e1e15b0475fb8d8564beaa60f23').env(
    templates=dict(
        job_root='home/turkey-analytics/ktereshin',
        dates='{%s..%s}' % (startdate, enddate)
    )
)
job = cluster.job()
appsfluer = job.table('statbox/extdata-apps-flyer-log/@dates') \
.filter(
    nf.custom(lambda x: x == 'com.yandex.browser', 'app_id'),
    nf.or_(
        nf.custom(lambda x: x == 'user quality', 'event_name'),
        nf.custom(lambda x: x == 'install', 'event_type')
    ),
    nf.custom(lambda x: x >= startdate and x <= enddate, 'install_time')
) \
.project(
    'appsflyer_device_id', 'event_name', 'event_type', 'event_value',
    media_source = ne.custom(get_media_source, 'media_source', 'agency'),
    campaign = ne.custom(lambda x: str(x).lower(), 'campaign'),
    install_date = ne.custom(lambda x: str(x).split(' ')[0], 'install_time')
) \
.groupby('appsflyer_device_id','media_source', 'campaign', 'install_date') \
.reduce(get_user_quality) \
.groupby('media_source', 'campaign', 'install_date') \
.aggregate(
    users = na.count(),
    user_quality_index_sum = na.sum('user_quality_index')
) \
.put('$job_root/appsflyer_dash/user_quality_index/user_quality_index')
job.run()
yaml_config = u'''
---
allow_recalculate: "0"
autovalues_enabled: "0"
dimensions:
  - fielddate: date
  - media_source: string
  - campaign: string
measures:
  - users: number
  - user_quality_index_sum: number
  - user_quality_index_mean	: number
view_types:
  media_source:
    type: Selector
    default: total
  campaign:
    type: Selector
    default: total
aggregate_uncomplete_period: "0"
'''
resp = requests.post(
    'https://upload.stat.yandex-team.ru/_api/report/config',
    headers={'StatRobotUser': 'robot_ktereshin', 'StatRobotPassword': '3en5cayAppeif6l'},
    data={
        'cube_config': yaml_config,
        'title': 'YaBro Android: User Quality Index',
        'name': 'Distribution/ktereshin/appsflyer_dash/user_quality_index'
    },
)
job = cluster.job()
installs = job.table('$job_root/appsflyer_dash/user_quality_index/user_quality_index')
data = installs.read().as_dataframe()
for sc in ['d', 'w','m']:
    res_data = prepare_data(data, scale=sc)
    load_data_to_stat(res_data, scale=sc, path = 'Distribution/ktereshin/appsflyer_dash/user_quality_index', robot = 'robot_ktereshin', password = '3en5cayAppeif6l')
