from nile.api.v1 import (
    aggregators as na,
    filters as nf,
    extractors as ne,
    grouping as ng,
    clusters,
    files,
    statface,
    Record,
    Template,
    Path
)

from qb2.api.v1 import filters as sf
from qb2.api.v1 import QB2, extractors as se
import pandas as pd
import datetime
import requests
import numpy as np
import datetime

def parse_query(x, y):
        if y == 'af_search' or y == 'first_search_event':
            for par in x.replace('"', '').split(','):
                if 'af_search_string' in par:
                    return par.split(':')[1].replace('}', '')
def parse_revenue(x, y):
        if y == 'af_search' or y == 'first_search_event':
            for par in x.replace('"', '').split(','):
                if 'af_revenue' in par:
                    return par.split(':')[1].replace('}', '')
def parse_currency(x, y):
        if y == 'af_search' or y == 'first_search_event':
            for par in x.replace('"', '').split(','):
                if 'af_currency' in par:
                    return par.split(':')[1].replace('}', '')
def rename_event(x):
    if 'search' in str(x):
        return 'search'
    else:
        return x
def media(x):
    if x == None:
        return 'Unknown'
    elif str(x).lower() == 'null':
        return 'Organic_Null'
    elif str(x).lower() == 'organic':
        return 'Organic'
    elif 'ya_' in str(x).lower():
        return 'Portal'
    elif '_int' in str(x).lower():
        return 'Paid'
    else:
        return 'Other'
def media(x):
    if x == None:
        return 'Unknown'
    elif str(x).lower() == 'null':
        return 'Organic_Null'
    elif str(x).lower() == 'organic':
        return 'Organic'
    elif 'ya_' in str(x).lower() or 'yandex' in str(x).lower() or 'switch' in str(x).lower():
        return 'Portal'
    elif '_int' in str(x).lower() or 'cpa' in str(x).lower() or 'cpi' in str(x).lower() or 'cpc' in str(x).lower():
        return 'Paid'
    else:
        return 'Other'
def media1(x):
    if x == None:
        return 'unknown'
    elif str(x).lower() == 'organic':
        return 'Organic'
    elif str(x).lower() == 'null':
        return 'Organic_null'
    else:
        return str(x).lower()
def week_start(x):
    try:
        return str(datetime.datetime.strptime(str(x), '%Y-%m-%d') - datetime.timedelta(days=datetime.datetime.strptime(str(x), '%Y-%m-%d').weekday())).split(' ')[0]
    except:
        return None
def month_start(x):
    try:
        return str(datetime.datetime.strptime(str(x), '%Y-%m-%d').replace(day=1)).split(' ')[0]
    except:
        return None
def install_day(x,y):
    if str(x) == str(y):
        return 'New'
    else:
        return 'Old'
def install_month(x,y):
    if str(x) == str(y):
        return 'New'
    else:
        return 'Old'
def install_week(x,y):
    if str(x) == str(y):
        return 'New'
    else:
        return 'Old'
def d_delta(x, y):
    try:
        t = datetime.datetime.strptime(str(x), '%Y-%m-%d') - datetime.datetime.strptime(str(y), '%Y-%m-%d')
        return t.days
    except:
        return 0
def w_delta(x, y):
    try:
        t = datetime.datetime.strptime(str(x), '%Y-%m-%d') - datetime.datetime.strptime(str(y), '%Y-%m-%d')
        return int(t.days/7)
    except:
        return 0
def m_delta(x, y):
    try:
        return (datetime.datetime.strptime(str(x), '%Y-%m-%d').year - datetime.datetime.strptime(str(y), '%Y-%m-%d').year)*12 + datetime.datetime.strptime(str(x), '%Y-%m-%d').month - datetime.datetime.strptime(str(y), '%Y-%m-%d').month
    except:
        return 0
def get_app_name(x):
    if str(x) == 'ru.yandex.searchplugin':
        return 'Search App Android'
    elif str(x) == 'id1050704155':
        return 'Search App IOS'
    elif str(x) == 'com.yandex.browser':
        return 'YaBro Android'
    elif str(x) == 'id574939428':
        return 'YaBro iPad'
    else:
        return 'YaBro iPhone'
def get_media_source(x,y):
    if x == 'null':
        return y.lower()
    else:
        return x.lower()
enddate = datetime.datetime.now() - datetime.timedelta(days=1)
startdate = datetime.datetime.now() - datetime.timedelta(days=8)
enddate = str(enddate).split(' ')[0]
startdate = str(startdate).split(' ')[0]
startdate = '2016-02-10'
cluster = clusters.Hahn(pool='mobile-research', token='a6575e1e15b0475fb8d8564beaa60f23').env(
    templates=dict(
        job_root='home/turkey-analytics/ktereshin',
        dates='{%s}' % (enddate)
    )
)
job = cluster.job()
appsfluer = job.table('statbox/extdata-apps-flyer-log/@dates') \
    .filter(
        nf.custom(lambda x: x == 'install', 'event_type'),
        nf.custom(lambda x: x >= startdate, 'install_time'),
        nf.custom(lambda x: x != '' and x != None, 'appsflyer_device_id'),
        nf.custom(lambda x: x in ['ru.yandex.searchplugin', 'id1050704155', 'com.yandex.browser', 'id574939428', 'id483693909'], 'app_id')
    ) \
    .project(
        'appsflyer_device_id',
        'app_id',
        app_version = ne.custom(lambda x: str(x).lower(), 'app_version'),
        media_source = ne.custom(get_media_source, 'media_source', 'agency'),
        platform = ne.custom(lambda x: str(x).lower(), 'platform'),
        campaign = ne.custom(lambda x: str(x).lower(), 'campaign'),
        city = ne.custom(lambda x: str(x).lower(), 'city'),
        device_brand = ne.custom(lambda x: str(x).lower(), 'device_brand'),
        device_model = ne.custom(lambda x: str(x).lower(), 'device_model'),
        event_type = ne.custom(lambda x: str(x).lower(), 'event_type'),
        event_name = ne.custom(lambda x: str(x).lower(), 'event_name'),
        event_value = ne.custom(lambda x: str(x).lower(), 'event_value'),
        af_keywords = ne.custom(lambda x: str(x).lower(), 'af_keywords'),
        device_type = ne.custom(lambda x: str(x).lower(), 'device_type'),
        install_date = ne.custom(lambda x: str(x).split(' ')[0], 'install_time'),
        country_code = ne.custom(lambda x: str(x).lower(), 'country_code'),
        app_name = ne.custom(get_app_name, 'app_id')
    )
appsfluer = appsfluer \
    .groupby(
        'app_name',
        'appsflyer_device_id'
    ) \
    .aggregate(
        app_version=na.min('app_version'),
        campaign=na.min('campaign'),
        city=na.min('city'),
        country_code=na.min('country_code'),
        device_brand=na.min('device_brand'),
        device_model=na.min('device_model'),
        media_source=na.min('media_source'),
        install_date=na.min('install_date')
    )
appsfluer = appsfluer \
    .project(
        ne.all(),
        media_source_type=ne.custom(media,'media_source')
    ) \
    .project(
        ne.all(),
        media_source=ne.custom(media1,'media_source')
    ) \
    .project(
        ne.all(),
        install_week = ne.custom(week_start, 'install_date'),
        install_month = ne.custom(month_start, 'install_date')
    ) \
    .put('$job_root/appsflyer_dash/appsflyer_installs', append = True)
job.run()
