from nile.api.v1 import (
    aggregators as na,
    filters as nf,
    extractors as ne,
    grouping as ng,
    clusters,
    files,
    statface,
    Record,
    Template,
    Path
)
from qb2.api.v1 import filters as sf
from qb2.api.v1 import QB2, extractors as se
import pandas as pd
import datetime
import numpy as np
import time
import json
from scipy.spatial.distance import pdist, cdist, squareform
from qb2.api.v1 import resources
from functools import partial
def rename_event(x):
    if 'search' in str(x):
        return 'search'
    else:
        return x
def sec_delta(x, y):
    try:
        t = datetime.datetime.strptime(str(x), '%Y-%m-%d %H:%M:%S') - datetime.datetime.strptime(str(y), '%Y-%m-%d %H:%M:%S')
        return t.total_seconds()
    except:
        return None
def get_app_name(x):
    if str(x) == 'ru.yandex.searchplugin':
        return 'Search App Android'
    elif str(x) == 'id1050704155':
        return 'Search App IOS'
    elif str(x) == 'com.yandex.browser':
        return 'YaBro Android'
    elif str(x) == 'id574939428':
        return 'YaBro iPad'
    else:
        return 'YaBro iPhone'
def get_start_of_previous_month(x):
    if x.month == 1:
        x = x.replace(day=1, month=12, year = x.year - 1)
        return str(x).split(' ')[0]
    else:
        x = x.replace(day=1, month=x.month - 1)
        return str(x).split(' ')[0]
def get_installs_from_appsflyer():
    job = cluster.job()
    appsflyer_stream = job.table('statbox/extdata-apps-flyer-log/@dates')         .filter(
            nf.custom(lambda x: x != 'null', 'campaign'),
            nf.custom(lambda x: x == 'install', 'event_type'),
            nf.custom(lambda x: x >= startdate, 'install_time'),
            nf.custom(lambda x: x != '' and x != None, 'appsflyer_device_id'),
            nf.custom(
                lambda x: x in ['ru.yandex.searchplugin', 'id1050704155', 'com.yandex.browser',
                                'id574939428', 'id483693909'],
                'app_id'
            ),
            nf.or_(
                nf.custom(lambda x: not str(x).lower() in ['organic'], 'media_source'),
                nf.custom(lambda x: x != 'null', 'agency')
            )
        ) \
        .project(
            'appsflyer_device_id', 'click_time', 'ip', 'os_version',
            media_source=ne.custom(
                lambda x, y: y.lower() if x == 'null' else x.lower(),
                'media_source', 'agency'
            ),
            platform=ne.custom(lambda x: str(x).lower(), 'platform'),
            campaign=ne.custom(lambda x: str(x).lower(), 'campaign'),
            city=ne.custom(lambda x: str(x).lower(), 'city'),
            device_brand=ne.custom(lambda x: str(x).lower(), 'device_brand'),
            device_model=ne.custom(lambda x: str(x).lower(), 'device_model'),
            install_date='install_time',
            event_time='event_time',
            day_hour=ne.custom(lambda x: str(x).split(' ')[1][:4], 'install_time'),
            country_code=ne.custom(lambda x: str(x).lower(), 'country_code'),
            app_name=ne.custom(get_app_name, 'app_id'),
            campaign_extended_name=ne.custom(
                lambda *args: campaign_extended_name_split_symbol.join(args),
                'app_name', 'media_source', 'campaign'
            ),
            carrier=ne.custom(lambda x: str(x).lower(), 'carrier'),
            operator=ne.custom(lambda x: str(x).lower(), 'operator'),
            region_by_ip=ne.custom(
                lambda ip, geobase: None if ip is None else geobase.region_by_ip(ip).name,
                'ip', resources.resource('Geobase')
            ),
            app_version=ne.custom(lambda x: str(x).lower(), 'app_version'),
            wifi=ne.custom(lambda x: str(x).lower(), 'wifi')
        )

    appsflyer_stream = appsflyer_stream.join(
        job.table('home/turkey-analytics/ktereshin/appsflyer_dash/appsflyer_device_id_dist'),
        by='appsflyer_device_id',
        type='left'
    )
    appsflyer_stream = appsflyer_stream.join(
        job.table('$job_root/antifraud/did_puth_token_dict'),
        by_left='uuid',
        by_right='DeviceID',
        type='left'
    )
    appsflyer_stream = appsflyer_stream.project(
        ne.all(),
        sec_between_adv_click_and_app_launch=ne.custom(sec_delta, 'install_date', 'click_time')
    )
    appsflyer_stream = appsflyer_stream.put('$job_root/antifraud/weeks/installs')

    job.run()

enddate = datetime.datetime.now() - datetime.timedelta(days=datetime.datetime.now().weekday()+1)
startdate =  enddate - datetime.timedelta(days=enddate.weekday())
startdate = str(startdate).split(' ')[0]
enddate = str(enddate).split(' ')[0]
campaign_extended_name_split_symbol = '__'
cluster = clusters.Hahn(pool='mobile-research', token='a6575e1e15b0475fb8d8564beaa60f23').env(
    templates=dict(
        job_root='home/turkey-analytics/ktereshin',
        dates='{%s..%s}' % (startdate, enddate)
    )
)

def count_installs_by_campaign(in_path, out_path):
    job = cluster.job()
    appsflyer_stream = job.table(in_path)         .groupby('app_name', 'media_source', 'campaign')         .aggregate(installs = na.count())         .put(out_path)
    job.run()
print startdate, enddate
get_installs_from_appsflyer()
count_installs_by_campaign('$job_root/antifraud/weeks/installs', '$job_root/antifraud/weeks/installs_by_campaign')
