# coding: utf-8
from nile.api.v1 import (
    aggregators as na,
    filters as nf,
    extractors as ne,
    grouping as ng,
    clusters,
    files,
    statface,
    Record,
    Template,
    Path
)

from qb2.api.v1 import filters as sf
from qb2.api.v1 import QB2, extractors as se
import pandas as pd
import datetime
import requests
import numpy as np
import datetime
import re
from dateutil.relativedelta import relativedelta
def d_delta(x, y):
    try:
        t = datetime.datetime.strptime(str(x), '%Y-%m-%d %H:%M:%S') - datetime.datetime.strptime(str(y), '%Y-%m-%d %H:%M:%S')
        return t.days
    except:
        return 0
def parse_query(x, y):
        if y == 'af_search' or y == 'first_search_event':
            for par in x.replace('"', '').split(','):
                if 'af_search_string' in par:
                    return par.split(':')[1].replace('}', '')
def parse_revenue(x, y):
        if y == 'af_search' or y == 'first_search_event':
            for par in x.replace('"', '').split(','):
                if 'af_revenue' in par:
                    return par.split(':')[1].replace('}', '')
def parse_currency(x, y):
        if y == 'af_search' or y == 'first_search_event':
            for par in x.replace('"', '').split(','):
                if 'af_currency' in par:
                    return par.split(':')[1].replace('}', '')
def rename_event(x):
    if 'search' in str(x):
        return 'search'
    else:
        return x
def media(x):
    if x == None:
        return 'Unknown'
    elif str(x).lower() == 'null' or str(x).lower() == 'organic':
        return 'Organic'
    elif 'ya_' in str(x).lower():
        return 'Portal'
    elif '_int' in str(x).lower():
        return 'Paid'
    else:
        return 'Other'
def media1(x):
    if x == None:
        return 'Unknown'
    elif str(x).lower() == 'null' or str(x).lower() == 'organic':
        return 'Organic'
    else:
        return str(x).lower()
def week_start(x):
    try:
        return str(datetime.datetime.strptime(str(x), '%Y-%m-%d') - datetime.timedelta(days=datetime.datetime.strptime(str(x), '%Y-%m-%d').weekday())).split(' ')[0]
    except:
        return None
def month_start(x):
    try:
        return str(datetime.datetime.strptime(str(x), '%Y-%m-%d').replace(day=1)).split(' ')[0]
    except:
        return None
def install_day(x,y):
    if str(x) == str(y):
        return 'New'
    else:
        return 'Old'
def install_month(x,y):
    if str(x) == str(y):
        return 'New'
    else:
        return 'Old'
def install_week(x,y):
    if str(x) == str(y):
        return 'New'
    else:
        return 'Old'
def d_delta(x, y):
    try:
        t = datetime.datetime.strptime(str(x), '%Y-%m-%d') - datetime.datetime.strptime(str(y), '%Y-%m-%d')
        return t.days
    except:
        return None
def w_delta(x, y):
    try:
        t = datetime.datetime.strptime(str(x), '%Y-%m-%d') - datetime.datetime.strptime(str(y), '%Y-%m-%d')
        return int(t.days/7)
    except:
        return None
def m_delta(x, y):
    try:
        return (datetime.datetime.strptime(str(x), '%Y-%m-%d').year - datetime.datetime.strptime(str(y), '%Y-%m-%d').year)*12 + datetime.datetime.strptime(str(x), '%Y-%m-%d').month - datetime.datetime.strptime(str(y), '%Y-%m-%d').month
    except:
        return None
def get_app_name(x):
    if str(x) == 'ru.yandex.searchplugin':
        return 'Search App Android'
    elif str(x) == 'id1050704155':
        return 'Search App IOS'
    elif str(x) == 'com.yandex.browser':
        return 'YaBro Android'
    elif str(x) == 'id574939428':
        return 'YaBro iPad'
    else:
        return 'YaBro iPhone'
def dd(x):
    if not type(x) == int and not type(x) == float:
        return 0
    else:
        return x
def check_date(df):
    if df['days_delta']==0:
        return df['fielddate']
    else:
        return str(pd.date_range(str(pd.date_range(df['fielddate'], periods = df['days_delta'], freq = 'M')[df['days_delta']-1]).split(' ')[0], periods = 2, freq = 'D')[1]).split(' ')[0]
def check_date_W(df):
    if df['days_delta']==0:
        return df['fielddate']
    else:
        return str(pd.date_range(str(pd.date_range(df['fielddate'], periods = df['days_delta'], freq = 'W')[df['days_delta']-1]).split(' ')[0], periods = 2, freq = 'D')[1]).split(' ')[0]

enddate = datetime.datetime.now() - datetime.timedelta(days=2)
startdate = datetime.datetime.now() - datetime.timedelta(days=180)
enddate = str(enddate).split(' ')[0]
startdate = str(startdate).split(' ')[0]
startdate = '2016-05-16'
cluster = clusters.Hahn(pool='mobile-research', token='a6575e1e15b0475fb8d8564beaa60f23').env(
    templates=dict(
        job_root='home/turkey-analytics/ktereshin',
        dates='{%s..%s}' % (startdate, enddate)
    )
)
job = cluster.job()
appsfluer = job.table('$job_root/appsflyer_dash/appsflyer_installs')
appsfluer = appsfluer     .groupby(
        'app_name',
        'campaign',
        'country_code',
        'media_source',
        'media_source_type',
        'install_date'
    )\
    .aggregate(
        installs = na.count()
    )
appsfluer = appsfluer.put('$job_root/appsflyer_dash/appsflyer_installs_for_dash')
job.run()


# In[ ]:

cluster = clusters.Hahn(pool='mobile-research', token='a6575e1e15b0475fb8d8564beaa60f23').env(
    templates=dict(
        job_root='home/turkey-analytics/ktereshin',
        dates='{%s..%s}' % (startdate, enddate)
    )
)
job = cluster.job()
searches = job.table('$job_root/appsflyer_dash/searches')     .groupby(
        'appsflyer_device_id',
        'event_date',
        'event_month',
        'event_week'
    ) \
    .aggregate(
        searches=na.count()
    )
installs = job.table('$job_root/appsflyer_dash/appsflyer_installs')
installs = installs     .join(
        searches,
        by='appsflyer_device_id',
        type='left'
    )
installs = installs     .project(
        ne.all(),
        days_delta = ne.custom(d_delta, 'event_date', 'install_date'),
        weeks_delta = ne.custom(w_delta, 'event_week', 'install_week'),
        months_delta = ne.custom(m_delta, 'event_month', 'install_month'),
        install_weeks_delta = ne.custom(w_delta, 'event_date', 'install_date'),
        install_months_delta = ne.custom(m_delta, 'event_date', 'install_date'),
        searches = ne.custom(dd, 'searches')
    )
installs = installs.put('$job_root/appsflyer_dash/install_searches_cohort')
job.run()

cluster = clusters.Hahn(pool='mobile-research', token='a6575e1e15b0475fb8d8564beaa60f23').env(
    templates=dict(
        job_root='home/turkey-analytics/ktereshin',
        dates='{%s..%s}' % (startdate, enddate)
    )
)
job = cluster.job()
searches = job.table('$job_root/appsflyer_dash/install_searches_cohort')     .filter(
        nf.custom(lambda x: x >=0 and x <= 30, 'days_delta'),
        nf.custom(lambda x: x >0, 'searches')
    )
searches = searches     .groupby(
        'app_name',
        'campaign',
        'country_code',
        'media_source',
        'media_source_type',
        'install_date',
        'appsflyer_device_id'
    ) \
    .aggregate(
        searches = na.sum('searches')
    )
searches = searches     .groupby(
        'app_name',
        'campaign',
        'country_code',
        'media_source',
        'media_source_type',
        'install_date'
    ) \
    .aggregate(
        searching_users_af_30_days=na.count(),
        searches_af_30_days = na.sum('searches')
    )
installs = job.table('$job_root/appsflyer_dash/appsflyer_installs_for_dash')     .project(
        'app_name',
        'campaign',
        'country_code',
        'media_source',
        'media_source_type',
        'install_date',
        'installs'
    )
installs = installs     .join(
        searches,
        by=['app_name','campaign', 'country_code','media_source', 'media_source_type', 'install_date'],
        type='left'
    )
installs = installs.put('$job_root/appsflyer_dash/appsflyer_installs_cr_for_dash')
job.run()

cluster = clusters.Hahn(pool='mobile-research', token='a6575e1e15b0475fb8d8564beaa60f23').env(
    templates=dict(
        job_root='home/turkey-analytics/ktereshin',
        dates='{%s..%s}' % (startdate, enddate)
    )
)
job = cluster.job()
searches = job.table('$job_root/appsflyer_dash/install_searches_cohort')     .filter(
        nf.custom(lambda x: x >=0, 'days_delta'),
        nf.custom(lambda x: x >0, 'searches')
    )
searches = searches     .groupby(
        'app_name',
        'campaign',
        'country_code',
        'media_source',
        'media_source_type',
        'install_date',
        'appsflyer_device_id',
        'days_delta'
    ) \
    .aggregate(
        searches = na.sum('searches')
    )
searches = searches     .groupby(
        'app_name',
        'campaign',
        'country_code',
        'media_source',
        'media_source_type',
        'install_date',
        'days_delta'
    ) \
    .aggregate(
        searching_users_af=na.count(),
        searches_af = na.sum('searches')
    )
searches = searches.put('$job_root/appsflyer_dash/appsflyer_installs_cohort_for_dash')
job.run()

cluster = clusters.Hahn(pool='mobile-research', token='a6575e1e15b0475fb8d8564beaa60f23').env(
    templates=dict(
        job_root='home/turkey-analytics/ktereshin',
        dates='{%s..%s}' % (startdate, enddate)
    )
)
job = cluster.job()
installs = job.table('$job_root/appsflyer_dash/appsflyer_installs')
uuid_adi = job.table('$job_root/appsflyer_dash/appsflyer_device_id_dist')     .project(
        'appsflyer_device_id',
        uuid = ne.custom(lambda x: str(x).lower(),'uuid')
    )
did_uuid_dict = job.table('$job_root/did_uuid_dict/dict')     .project(
        uuid = ne.custom(lambda x: str(x).lower(),'device_id'),
        new_uuid = 'uuid',app_name = 'api_key'
    )
installs = installs     .join(
        uuid_adi,
        by='appsflyer_device_id', type='left'
    )
installs_uuid = installs     .filter(
        nf.custom(lambda x: x < '5.2', 'app_version'),
        nf.custom(lambda x: x == 'Search App Android', 'app_name')
    )
installs_did = installs.     filter(
        nf.or_(
            nf.and_(
                nf.custom(lambda x: x >='5.2', 'app_version'),
                nf.custom(lambda x: x == 'Search App Android', 'app_name')
            ),
            nf.custom(lambda x: x in ['Search App IOS', 'YaBro Android', 'YaBro iPad','YaBro iPhone'], 'app_name')
        )
    )
installs_did = installs_did     .join(
        did_uuid_dict,
        by=['uuid', 'app_name'],
        type='left'
    ) \
    .project(
        ne.all(),
        uuid = 'new_uuid'
    )
installs = job.concat(
    installs_uuid,
    installs_did
)
installs = installs.put('$job_root/appsflyer_dash/appsflyer_installs_uuid')
job.run()

cluster = clusters.Hahn(pool='mobile-research', token='a6575e1e15b0475fb8d8564beaa60f23').env(
    templates=dict(
        job_root='home/turkey-analytics/ktereshin',
        dates='{%s..%s}' % (startdate, enddate)
    )
)
job = cluster.job()
installs = job.table('$job_root/appsflyer_dash/appsflyer_installs_uuid')     .filter(
        nf.custom(lambda x: x != None and x != '', 'uuid')
    )
search = job.table('$job_root/searches_money/searches_money')     .filter(
        nf.custom(lambda x: x != None and x != '', 'uuid'),
        nf.custom(lambda x: x != None and x != '', 'date')
    ) \
    .project(
        'date',
        'searches',
        'revenue',
        uuid = ne.custom(lambda x: str(x).lower(),'uuid')
    )
installs = installs     .join(
        search,
        by='uuid',
        type='left'
    ) \
    .project(
        ne.all(),
        days_delta = ne.custom(d_delta, 'date', 'install_date'),
        search_month = ne.custom(month_start, 'date'),
        search_week = ne.custom(week_start, 'date')
    )
installs = installs     .project(
        ne.all(),
        weeks_delta = ne.custom(w_delta, 'search_week', 'install_week'),
        months_delta = ne.custom(m_delta, 'search_month', 'install_month'),
        install_weeks_delta = ne.custom(w_delta, 'date', 'install_date'),
        install_months_delta = ne.custom(m_delta, 'date', 'install_date'),
        searches = ne.custom(dd, 'searches'),
        revenue = ne.custom(dd, 'revenue')
    )
installs = installs.put('$job_root/appsflyer_dash/appsflyer_installs_cube_searches_revenue')
job.run()

job = cluster.job()
searches = job.table('$job_root/appsflyer_dash/appsflyer_installs_cube_searches_revenue')     .filter(
        nf.custom(lambda x: x >=0, 'days_delta')
    )
searches = searches     .groupby(
        'app_name',
        'campaign',
        'country_code',
        'media_source',
        'media_source_type',
        'install_date',
        'days_delta',
        'appsflyer_device_id'
    ) \
    .aggregate(
        searches = na.sum('searches'),
        revenue = na.sum('revenue')
    )
searches = searches     .groupby(
        'app_name',
        'campaign',
        'country_code',
        'media_source',
        'media_source_type',
        'install_date',
        'days_delta'
    ) \
    .aggregate(
        searches_cube = na.sum('searches'),
        revenue_cube = na.sum('revenue'),
        searching_users_cube=na.count()
    )
searches = searches.put('$job_root/appsflyer_dash/appsflyer_installs_cube_searches_revenue_for_dash')
job.run()


cluster = clusters.Hahn(pool='mobile-research', token='a6575e1e15b0475fb8d8564beaa60f23').env(
    templates=dict(
        job_root='home/turkey-analytics/ktereshin',
        dates='{%s..%s}' % (startdate, enddate)
    )
)
job = cluster.job()
searches = job.table('$job_root/appsflyer_dash/install_searches_cohort').filter(nf.custom(lambda x: x >=0 and x <= 30, 'days_delta'))
searches = searches.groupby('app_name','campaign', 'country_code',
                              'media_source', 'media_source_type', 'install_month', 'appsflyer_device_id').aggregate(searches = na.sum('searches'))
searches = searches.groupby('app_name','campaign', 'country_code',
                              'media_source', 'media_source_type', 'install_month').aggregate(searching_users_af_30_days=na.count(), searches_af_30_days = na.sum('searches'))
installs = job.table('$job_root/appsflyer_dash/appsflyer_installs_for_dash').project('app_name','campaign', 'country_code','media_source', 'media_source_type', 'installs', install_month=ne.custom(month_start,'install_date'))
installs = installs.groupby('app_name','campaign', 'country_code','media_source', 'media_source_type', 'install_month').aggregate(installs = na.sum('installs'))
installs = installs.join(searches, by=['app_name','campaign', 'country_code','media_source', 'media_source_type', 'install_month'], type='left')
installs = installs.put('$job_root/appsflyer_dash/appsflyer_installs_cr_for_dash_months')
job.run()

cluster = clusters.Hahn(pool='mobile-research', token='a6575e1e15b0475fb8d8564beaa60f23').env(
    templates=dict(
        job_root='home/turkey-analytics/ktereshin',
        dates='{%s..%s}' % (startdate, enddate)
    )
)
job = cluster.job()
searches = job.table('$job_root/appsflyer_dash/install_searches_cohort').filter(nf.custom(lambda x: x >=0, 'install_months_delta'))
searches = searches.project('app_name','campaign', 'country_code',
                              'media_source', 'media_source_type', 'install_month', 'appsflyer_device_id', 'searches' , months_delta = 'install_months_delta')
searches = searches.groupby('app_name','campaign', 'country_code',
                              'media_source', 'media_source_type', 'install_month', 'appsflyer_device_id', 'months_delta').aggregate(searches = na.sum('searches'))
searches = searches.groupby('app_name','campaign', 'country_code',
                              'media_source', 'media_source_type', 'install_month', 'months_delta').aggregate(searching_users_af=na.count(), searches_af = na.sum('searches'))
searches = searches.put('$job_root/appsflyer_dash/appsflyer_installs_cohort_for_dash_months')
job.run()

job = cluster.job()
searches = job.table('$job_root/appsflyer_dash/appsflyer_installs_cube_searches_revenue').filter(nf.custom(lambda x: x >=0, 'install_months_delta'))
searches = searches.project('app_name','campaign', 'country_code',
                              'media_source', 'media_source_type', 'install_month', 'appsflyer_device_id', 'revenue', 'searches', months_delta = 'install_months_delta')
searches = searches.groupby('app_name','campaign', 'country_code',
                              'media_source', 'media_source_type', 'install_month', 'appsflyer_device_id', 'months_delta')\
.aggregate(searches = na.sum('searches'), revenue = na.sum('revenue'))
searches = searches.groupby('app_name','campaign', 'country_code',
                              'media_source', 'media_source_type', 'install_month', 'months_delta')\
.aggregate(searches_cube = na.sum('searches'), revenue_cube = na.sum('revenue'), searching_users_cube=na.count())
searches = searches.put('$job_root/appsflyer_dash/appsflyer_installs_cube_searches_revenue_for_dash_months')
job.run()

job = cluster.job()
installs = job.table('$job_root/appsflyer_dash/appsflyer_installs_cr_for_dash_months')
installs_af_cohort = job.table('$job_root/appsflyer_dash/appsflyer_installs_cohort_for_dash_months')
installs_cube_cohort = job.table('$job_root/appsflyer_dash/appsflyer_installs_cube_searches_revenue_for_dash_months')
installs_af_cohort = installs_af_cohort.join(installs_cube_cohort,
                                            by=['app_name','campaign', 'country_code',
                              'media_source', 'media_source_type', 'install_month', 'months_delta'], type='full')
installs = installs.join(installs_af_cohort, by=['app_name','campaign', 'country_code',
                              'media_source', 'media_source_type', 'install_month'], type='left')
installs = installs.put('$job_root/appsflyer_dash/report/metrics_months')
job.run()

cluster = clusters.Hahn(pool='mobile-research', token='a6575e1e15b0475fb8d8564beaa60f23').env(
    templates=dict(
        job_root='home/turkey-analytics/ktereshin',
        dates='{%s..%s}' % (startdate, enddate)
    )
)
job = cluster.job()
installs = job.table('$job_root/appsflyer_dash/report/metrics_months')
data = installs.read().as_dataframe()

data['fielddate'] = data['install_month']
data['days_delta'] = data['months_delta']
data.drop(['install_month', 'months_delta'], axis=1, inplace=True)
data[['installs']] = data[['installs']].fillna(0).astype(int)
data[['revenue_cube']] = data[['revenue_cube']].fillna(0).astype(int)
data[['searches_af']] = data[['searches_af']].fillna(0).astype(int)
data[['searches_af_30_days']] = data[['searches_af_30_days']].fillna(0).astype(int)
data[['searches_cube']] = data[['searches_cube']].fillna(0).astype(int)
data[['searching_users_af']] = data[['searching_users_af']].fillna(0).astype(int)
data[['searching_users_af_30_days']] = data[['searching_users_af_30_days']].fillna(0).astype(int)
data[['searching_users_cube']] = data[['searching_users_cube']].fillna(0).astype(int)
data[['days_delta']] = data[['days_delta']].fillna(0).astype(int)
def cam(x):
    if x =='' or x == None:
        return 'null'
    else:
        return x
data['campaign'] = data['campaign'].apply(cam)
installs = data[data['days_delta']==0][['fielddate','app_name','days_delta',
         'country_code', 'media_source', 'media_source_type', 'campaign', 'installs']]
cohort = data[['fielddate','app_name','days_delta',
         'country_code', 'media_source', 'media_source_type', 'campaign','revenue_cube', 'searches_af',
                                                                                               'searches_af_30_days', 'searches_cube', 'searching_users_af',
                                                                                              'searching_users_af_30_days', 'searching_users_cube']]

res = pd.DataFrame()

for d in xrange(0, max(list(cohort['days_delta'].unique()))+1):
    installs['days_delta'] = d
    res = res.append(installs)

col = ['fielddate','app_name','days_delta','country_code', 'media_source', 'media_source_type', 'campaign']
data = pd.merge(res, cohort, on=col, how='left')

data[['installs']] = data[['installs']].fillna(0).astype(int)
data[['revenue_cube']] = data[['revenue_cube']].fillna(0).astype(int)
data[['searches_af']] = data[['searches_af']].fillna(0).astype(int)
data[['searches_af_30_days']] = data[['searches_af_30_days']].fillna(0).astype(int)
data[['searches_cube']] = data[['searches_cube']].fillna(0).astype(int)
data[['searching_users_af']] = data[['searching_users_af']].fillna(0).astype(int)
data[['searching_users_af_30_days']] = data[['searching_users_af_30_days']].fillna(0).astype(int)
data[['searching_users_cube']] = data[['searching_users_cube']].fillna(0).astype(int)
data[['days_delta']] = data[['days_delta']].fillna(0).astype(int)
res_data = pd.DataFrame(
    data.groupby(
        ['fielddate','app_name','days_delta',
         'country_code', 'media_source', 'media_source_type', 'campaign'])[['installs','revenue_cube', 'searches_af',
                                                                                               'searches_af_30_days', 'searches_cube', 'searching_users_af',
                                                                                              'searching_users_af_30_days', 'searching_users_cube']].sum()).reset_index()

res_data = res_data.append(pd.DataFrame(data.groupby(['fielddate','app_name','days_delta',
         'country_code', 'media_source', 'media_source_type'])[['installs','revenue_cube', 'searches_af','searches_af_30_days', 'searches_cube', 'searching_users_af',
                                                                                              'searching_users_af_30_days', 'searching_users_cube']].sum()).reset_index())

res_data = res_data.append(pd.DataFrame(data.groupby(['fielddate','app_name','days_delta',
         'country_code', 'media_source', 'campaign'])[['installs','revenue_cube', 'searches_af','searches_af_30_days', 'searches_cube', 'searching_users_af',
                                                                                              'searching_users_af_30_days', 'searching_users_cube']].sum()).reset_index())
res_data = res_data.append(pd.DataFrame(data.groupby(['fielddate','app_name','days_delta',
         'country_code', 'media_source_type', 'campaign'])[['installs','revenue_cube', 'searches_af','searches_af_30_days', 'searches_cube', 'searching_users_af',
                                                                                              'searching_users_af_30_days', 'searching_users_cube']].sum()).reset_index())
res_data = res_data.append(pd.DataFrame(data.groupby(['fielddate','app_name','days_delta',
         'media_source', 'media_source_type', 'campaign'])[['installs','revenue_cube', 'searches_af','searches_af_30_days', 'searches_cube', 'searching_users_af',
                                                                                              'searching_users_af_30_days', 'searching_users_cube']].sum()).reset_index())

res_data = res_data.append(pd.DataFrame(data.groupby(['fielddate','app_name','days_delta',
         'country_code', 'media_source'])[['installs','revenue_cube', 'searches_af','searches_af_30_days', 'searches_cube', 'searching_users_af',
                                                                                              'searching_users_af_30_days', 'searching_users_cube']].sum()).reset_index())
res_data = res_data.append(pd.DataFrame(data.groupby(['fielddate','app_name','days_delta',
         'country_code', 'media_source_type'])[['installs','revenue_cube', 'searches_af','searches_af_30_days', 'searches_cube', 'searching_users_af',
                                                                                              'searching_users_af_30_days', 'searching_users_cube']].sum()).reset_index())
res_data = res_data.append(pd.DataFrame(data.groupby(['fielddate','app_name','days_delta',
         'country_code', 'campaign'])[['installs','revenue_cube', 'searches_af','searches_af_30_days', 'searches_cube', 'searching_users_af',
                                                                                              'searching_users_af_30_days', 'searching_users_cube']].sum()).reset_index())
res_data = res_data.append(pd.DataFrame(data.groupby(['fielddate','app_name','days_delta',
         'media_source', 'media_source_type'])[['installs','revenue_cube', 'searches_af','searches_af_30_days', 'searches_cube', 'searching_users_af',
                                                                                              'searching_users_af_30_days', 'searching_users_cube']].sum()).reset_index())
res_data = res_data.append(pd.DataFrame(data.groupby(['fielddate','app_name','days_delta',
         'media_source', 'campaign'])[['installs','revenue_cube', 'searches_af','searches_af_30_days', 'searches_cube', 'searching_users_af',
                                                                                              'searching_users_af_30_days', 'searching_users_cube']].sum()).reset_index())
res_data = res_data.append(pd.DataFrame(data.groupby(['fielddate','app_name','days_delta',
         'media_source_type', 'campaign'])[['installs','revenue_cube', 'searches_af','searches_af_30_days', 'searches_cube', 'searching_users_af',
                                                                                              'searching_users_af_30_days', 'searching_users_cube']].sum()).reset_index())
res_data = res_data.append(pd.DataFrame(data.groupby(['fielddate','app_name','days_delta',
         'media_source_type'])[['installs','revenue_cube', 'searches_af','searches_af_30_days', 'searches_cube', 'searching_users_af',
                                                                                              'searching_users_af_30_days', 'searching_users_cube']].sum()).reset_index())
res_data = res_data.append(pd.DataFrame(data.groupby(['fielddate','app_name','days_delta',
         'media_source'])[['installs','revenue_cube', 'searches_af','searches_af_30_days', 'searches_cube', 'searching_users_af',
                                                                                              'searching_users_af_30_days', 'searching_users_cube']].sum()).reset_index())
res_data = res_data.append(pd.DataFrame(data.groupby(['fielddate','app_name','days_delta',
         'country_code'])[['installs','revenue_cube', 'searches_af','searches_af_30_days', 'searches_cube', 'searching_users_af',
                                                                                              'searching_users_af_30_days', 'searching_users_cube']].sum()).reset_index())
res_data = res_data.append(pd.DataFrame(data.groupby(['fielddate','app_name','days_delta',
         'campaign'])[['installs','revenue_cube', 'searches_af','searches_af_30_days', 'searches_cube', 'searching_users_af',
                                                                                              'searching_users_af_30_days', 'searching_users_cube']].sum()).reset_index())
res_data = res_data.append(pd.DataFrame(data.groupby(['fielddate','app_name','days_delta'])[['installs','revenue_cube', 'searches_af','searches_af_30_days', 'searches_cube', 'searching_users_af',
                                                                                              'searching_users_af_30_days', 'searching_users_cube']].sum()).reset_index())
res_data = res_data.fillna('total')
res_data = pd.DataFrame(res_data.groupby(['fielddate','app_name','days_delta',
         'country_code', 'media_source', 'media_source_type', 'campaign'])[['installs','revenue_cube', 'searches_af','searches_af_30_days', 'searches_cube', 'searching_users_af',
                                                                                              'searching_users_af_30_days', 'searching_users_cube']].sum()).reset_index()
res_data['revenue_cube'] = res_data['revenue_cube'].apply(lambda x: float(x)*30/1000000)

res = pd.DataFrame()
for i in range(0,res_data['days_delta'].max() + 1):
    temp = res_data[res_data['days_delta']<=i]
    t = res_data[res_data['days_delta']==i][['fielddate','app_name',
             'country_code', 'media_source', 'media_source_type', 'campaign']]
    temp = pd.merge(temp, t, on = ['fielddate','app_name',
             'country_code', 'media_source', 'media_source_type', 'campaign'], how='inner')
    temp = pd.DataFrame(temp.groupby(['fielddate','app_name',
             'country_code', 'media_source', 'media_source_type', 'campaign'])[['revenue_cube', 'searches_af', 'searches_cube']].sum()).reset_index()
    temp = pd.concat([temp.iloc[:,:6],temp.iloc[:,6:].rename(columns = lambda x : 'cum_' + x)], axis = 1)
    temp['days_delta'] = i
    res = res.append(temp)

res_data = pd.merge(res_data, res, on=['fielddate','app_name',
             'country_code', 'media_source', 'media_source_type', 'days_delta', 'campaign'], how='left')
res_data['cum_revenue_cube'] = res_data['cum_revenue_cube'].fillna(0)
res_data['cum_searches_af'] = res_data['cum_searches_af'].fillna(0)
res_data['cum_searches_cube'] = res_data['cum_searches_cube'].fillna(0)

res_data['searches_per_users_af'] = res_data['searches_af']/res_data['searching_users_af']
res_data['searches_per_install_af'] = res_data['searches_af']/res_data['installs']
res_data['searches_per_users_cube'] = res_data['searches_cube']/res_data['searching_users_cube']
res_data['searches_per_install_cube'] = res_data['searches_cube']/res_data['installs']
res_data['rev_per_search_users_cube'] = res_data['revenue_cube']/res_data['searching_users_cube']
res_data['rev_per_install_cube'] = res_data['revenue_cube']/res_data['installs']
res_data['cum_searches_per_install_af'] = res_data['cum_searches_af']/res_data['installs']
res_data['cum_searches_per_install_cube'] = res_data['cum_searches_cube']/res_data['installs']
res_data['cum_rev_per_install_cube'] = res_data['cum_revenue_cube']/res_data['installs']
res_data['cr_install_searches'] = res_data['searching_users_af_30_days']/res_data['installs']
res_data['retention_rate_af'] = res_data['searching_users_af']/res_data['installs']
res_data['retention_rate_cube'] = res_data['searching_users_cube']/res_data['installs']

res_data['searches_per_users_af'] = res_data['searches_per_users_af'].fillna(0)
res_data['searches_per_install_af'] = res_data['searches_per_install_af'].fillna(0)
res_data['searches_per_users_cube'] = res_data['searches_per_users_cube'].fillna(0)
res_data['searches_per_install_cube'] = res_data['searches_per_install_cube'].fillna(0)
res_data['rev_per_search_users_cube'] = res_data['rev_per_search_users_cube'].fillna(0)
res_data['rev_per_install_cube'] = res_data['rev_per_install_cube'].fillna(0)
res_data['cum_searches_per_install_af'] = res_data['cum_searches_per_install_af'].fillna(0)
res_data['cum_searches_per_install_cube'] = res_data['cum_searches_per_install_cube'].fillna(0)
res_data['cum_rev_per_install_cube'] = res_data['cum_rev_per_install_cube'].fillna(0)
res_data['cr_install_searches'] = res_data['cr_install_searches'].fillna(0)
res_data['retention_rate_af'] = res_data['retention_rate_af'].fillna(0)
res_data['retention_rate_cube'] = res_data['retention_rate_cube'].fillna(0)
res_data.replace(np.inf, 0, inplace = True)
res_data.replace(np.inf, 0, inplace = True)
dates = list(res_data['fielddate'])
dd = list(res_data['days_delta'])
res = []
for i in xrange(len(dates)):
    if dd[i] == 0:
        res.append(dates[i])
    else:
        res.append(str(datetime.datetime.strptime(dates[i],"%Y-%m-%d") + relativedelta(months=dd[i])).split(' ')[0])
res_data['fielddate1'] = res
res_data = res_data[res_data['fielddate1'] <= res_data['fielddate'].max()]
res_data.drop('fielddate1', inplace=True, axis=1)
print 'start load data'
dates = list(res_data['fielddate'].unique())
for date in dates:
    r_data = res_data[res_data['fielddate'] == date]
    csv_data = re.sub('\n$', '', r_data.to_csv(sep=';', index=False))
    r = requests.post(
        'https://upload.stat.yandex-team.ru/_api/report/data',
        headers={'StatRobotUser': 'robot_ktereshin', 'StatRobotPassword': '3en5cayAppeif6l'},
        data={
            'name': 'Distribution/ktereshin/appsflyer_dash/appsflyer_dash_ver1',
            'scale': 'm',
            'data': csv_data,
        },
    )
    print 'Done for %s' % (date)
