from nile.api.v1 import (
    aggregators as na,
    filters as nf,
    extractors as ne,
    grouping as ng,
    clusters,
    files,
    statface,
    Record,
    Template,
    Path
)

from qb2.api.v1 import filters as sf
from qb2.api.v1 import QB2, extractors as se
import pandas as pd
import datetime
import numpy as np
import re
import requests
startdate = datetime.datetime.now() - datetime.timedelta(days=2)
enddate = startdate + datetime.timedelta(days=1)
startdate = str(startdate).split(' ')[0]
enddate = str(enddate).split(' ')[0]
cluster = clusters.Hahn(pool='mobile-research', token='a6575e1e15b0475fb8d8564beaa60f23').env(
    templates=dict(
        job_root='home/turkey-analytics/ktereshin',
        dates='{2016-07-19}'
    )
)
def platform(x,y):
    if 'window' in str(x).lower() and (y==None or y==''):
        return 5746
    elif 'android' in str(x).lower() and (y==None or y==''):
        return 10321
    elif 'ios' in str(x).lower() and (y==None or y==''):
        return 42989
    else:
        return y
def dd(x):
    if not type(x) == int and not type(x) == float:
        return 0
    else:
        return x
job = cluster.job()
search = job.table('$job_root/searches_money/install_searches_money_new_old')
search = search.project('media_source', 'project','media_source_type', 'device_id','country',
                        channel_type = 'install_channel_type',
                        platform = ne.custom(lambda x: str(x).lower(),'platform'),
                        searches = ne.custom(dd,'searches'),
                        revenue = ne.custom(dd,'revenue'),
                        fielddate = 'install_month')
installs = search.groupby('fielddate','media_source', 'project','platform', 'media_source_type', 'channel_type', 'country', 'device_id').aggregate(installs = na.count())
installs = installs.groupby('fielddate','media_source', 'project','platform', 'media_source_type', 'channel_type', 'country').aggregate(installs = na.count())
installs =installs.put('$job_root/searches_money/report/installs_search_money_new_user_cohort_by_months')
job.run()

job = cluster.job()
cohort = job.table('$job_root/searches_money/install_searches_money_new_old').project(ne.all(),searches = ne.custom(dd,'searches'),revenue = ne.custom(dd,'revenue')).filter(nf.custom(lambda x: x >= 0, 'weeks_delta'))
cohort = cohort.groupby('media_source', 'project','platform', 'media_source_type', 'channel_type', 'install_month', 'country', 'months_delta', 'device_id')\
.aggregate(revenue = na.sum('revenue'), searches = na.sum('searches'))
cohort = cohort.groupby('media_source', 'project','platform', 'media_source_type', 'channel_type', 'install_month', 'country', 'months_delta')\
.aggregate(revenue = na.sum('revenue'), searches = na.sum('searches'), searching_users=na.count())
cohort = cohort.project('media_source', 'project','platform', 'media_source_type', 'channel_type', 'country',  'searches', 'searching_users', 'revenue',days_delta = 'months_delta', fielddate = 'install_month')
cohort = cohort.put('$job_root/searches_money/report/cohort_search_money_new_user_cohort_by_months')
job.run()

job = cluster.job()
installs = job.table('$job_root/searches_money/report/installs_search_money_new_user_cohort_by_months')
installs= installs.project('installs',
                           channel_type = ne.custom(lambda x: str(x).lower(), 'channel_type'),
                           country = ne.custom(lambda x: str(x).lower(), 'country'),
                           fielddate = ne.custom(lambda x: str(x).lower(), 'fielddate'),
                           media_source = ne.custom(lambda x: str(x).lower(), 'media_source'),
                           media_source_type = ne.custom(lambda x: str(x).lower(), 'media_source_type'),
                           platform = ne.custom(lambda x: str(x).lower(), 'platform'),
                           project = ne.custom(lambda x: str(x).lower(), 'project'),
                          )
cohort = job.table('$job_root/searches_money/report/cohort_search_money_new_user_cohort_by_months')
cohort = cohort.project('revenue', 'searches', 'searching_users', 'days_delta',
                           channel_type = ne.custom(lambda x: str(x).lower(), 'channel_type'),
                           country = ne.custom(lambda x: str(x).lower(), 'country'),
                           fielddate = ne.custom(lambda x: str(x).lower(), 'fielddate'),
                           media_source = ne.custom(lambda x: str(x).lower(), 'media_source'),
                           media_source_type = ne.custom(lambda x: str(x).lower(), 'media_source_type'),
                           platform = ne.custom(lambda x: str(x).lower(), 'platform'),
                           project = ne.custom(lambda x: str(x).lower(), 'project'),
                          )
installs = installs.join(cohort, by=['fielddate','media_source', 'project','platform', 'media_source_type', 'country', 'channel_type'], type='left')
installs = installs.put('$job_root/searches_money/report/search_money_new_user_cohort_by_months')
job.run()

result_data = installs.read().as_dataframe()
result_data['installs'] = result_data['installs'].fillna(0)
result_data['searches'] = result_data['searches'].fillna(0)
result_data['revenue'] = result_data['revenue'].fillna(0)
result_data['searching_users'] = result_data['searching_users'].fillna(0)
res_data = pd.DataFrame(result_data.groupby(['media_source','fielddate', 'project','channel_type','media_source_type','days_delta', 'platform', 'country'])[['installs', 'revenue', 'searches', 'searching_users']].sum()).reset_index()
res_data = res_data.append(pd.DataFrame(result_data.groupby(['fielddate', 'project', 'platform','channel_type','days_delta', 'media_source','media_source_type'])[['installs','revenue', 'searches', 'searching_users']].sum()).reset_index())
res_data = res_data.append(pd.DataFrame(result_data.groupby(['fielddate', 'project','platform','channel_type','days_delta', 'media_source','country'])[['installs','revenue', 'searches', 'searching_users']].sum()).reset_index())
res_data = res_data.append(pd.DataFrame(result_data.groupby(['fielddate', 'project','platform','channel_type','days_delta', 'media_source_type','country'])[['installs','revenue', 'searches', 'searching_users']].sum()).reset_index())
res_data = res_data.append(pd.DataFrame(result_data.groupby(['fielddate', 'project','platform','channel_type','days_delta', 'media_source'])[['installs','revenue', 'searches', 'searching_users']].sum()).reset_index())
res_data = res_data.append(pd.DataFrame(result_data.groupby(['fielddate', 'project','platform','channel_type','days_delta', 'country'])[['installs','revenue', 'searches', 'searching_users']].sum()).reset_index())
res_data = res_data.append(pd.DataFrame(result_data.groupby(['fielddate', 'project','platform','channel_type','days_delta', 'media_source_type'])[['installs','revenue', 'searches', 'searching_users']].sum()).reset_index())
res_data = res_data.append(pd.DataFrame(result_data.groupby(['fielddate', 'project','platform','channel_type','days_delta'])[['installs','revenue', 'searches', 'searching_users']].sum()).reset_index())
res_data = res_data.fillna('total')
res_data = pd.DataFrame(res_data.groupby(['fielddate', 'project','channel_type','media_source_type','days_delta', 'platform', 'media_source', 'country'])[['installs','revenue', 'searches', 'searching_users']].mean()).reset_index()
def conv_money(x):
    try:
        return float(x)*30/1000000
    except:
        return 0
res_data['revenue'] = res_data['revenue'].apply(conv_money)
res = pd.DataFrame()
for i in range(0,int(res_data['days_delta'].max()) + 1):
    temp = res_data[res_data['days_delta']<=i]
    t = res_data[res_data['days_delta']==i][['fielddate', 'project','channel_type','media_source_type', 'platform', 'media_source', 'country']]
    temp = pd.merge(temp, t, on = ['fielddate', 'project','channel_type','media_source_type', 'platform', 'media_source', 'country'], how='inner')
    temp = pd.DataFrame(temp.groupby(['fielddate', 'project','channel_type','media_source_type', 'platform', 'media_source', 'country'])[['revenue', 'searches']].sum()).reset_index()
    temp = pd.concat([temp.iloc[:,:7],temp.iloc[:,7:].rename(columns = lambda x : 'cum_' + x)], axis = 1)
    temp['days_delta'] = i
    res = res.append(temp)
res_data = pd.merge(res_data, res, on=['fielddate', 'project','channel_type','media_source_type', 'platform', 'media_source', 'country', 'days_delta'], how='left')
res_data['retention_rate'] = res_data['searching_users']/res_data['installs']
res_data['searches_per_search_users'] = res_data['searches']/res_data['searching_users']
res_data['searches_per_installs'] = res_data['searches']/res_data['installs']
res_data['revenue_per_search_users'] = res_data['revenue']/res_data['searching_users']
res_data['revenue_per_installs'] = res_data['revenue']/res_data['installs']
res_data['cum_revenue_per_search_users'] = res_data['cum_revenue']/res_data['searching_users']
res_data['cum_revenue_per_installs'] = res_data['cum_revenue']/res_data['installs']
res_data['cum_searches_per_search_users'] = res_data['cum_searches']/res_data['searching_users']
res_data['cum_searches_per_installs'] = res_data['cum_searches']/res_data['installs']
res_data['installs'] = res_data['installs'].fillna(0)
res_data['searches'] = res_data['searches'].fillna(0)
res_data['revenue'] = res_data['revenue'].fillna(0)
res_data['searching_users'] = res_data['searching_users'].fillna(0)
res_data['retention_rate'] = res_data['retention_rate'].fillna(0)
res_data['searches_per_search_users'] = res_data['searches_per_search_users'].fillna(0)
res_data['searches_per_installs'] = res_data['searches_per_installs'].fillna(0)
res_data['revenue_per_search_users'] = res_data['revenue_per_search_users'].fillna(0)
res_data['revenue_per_installs'] = res_data['revenue_per_installs'].fillna(0)
res_data['revenue_per_installs'] = res_data['revenue_per_installs'].fillna(0)
res_data['cum_revenue_per_installs'] = res_data['cum_revenue_per_installs'].fillna(0)
res_data['cum_searches_per_search_users'] = res_data['cum_searches_per_search_users'].fillna(0)
res_data['cum_searches_per_search_users'] = res_data['cum_searches_per_search_users'].fillna(0)
csv_data = re.sub('\n$', '', res_data.to_csv(sep=';', index=False))
r = requests.post(
        'https://upload.stat.yandex-team.ru/_api/report/data',
        headers={'StatRobotUser': 'robot_ktereshin', 'StatRobotPassword': '3en5cayAppeif6l'},
        data={
            'name': 'Distribution/ktereshin/money_searches/search_money_new_user_cohort',
            'scale': 'm',
        'data': csv_data,
    },
)
