import datetime
import pickle

import numpy as np
import pandas as pd
from nile.api.v1 import (
    filters as nf,
    aggregators as na,
    extractors as ne,
    clusters,
    Record
)
from qb2.api.v1 import (
    filters as sf,
    extractors as se
)
enddate = datetime.datetime.now() - datetime.timedelta(days=datetime.datetime.now().weekday()+1)
startdate =  enddate - datetime.timedelta(days=enddate.weekday())
startdate = str(startdate).split(' ')[0]
enddate = str(enddate).split(' ')[0]
cluster = clusters.Hahn(pool='mobile-research', token='a6575e1e15b0475fb8d8564beaa60f23').env(
    templates=dict(
        job_root='home/turkey-analytics/ktereshin',
        dates='{%s..%s}' % (startdate, enddate)
    )
)
job = cluster.job()

metrika_screens = job.table('statbox/metrika-mobile-log/@dates')     .filter(
    nf.custom(lambda x: x != None and x != '', 'DeviceID'),
    nf.custom(lambda x: x == 'SESSION_FOREGROUND', 'SessionType'),
    nf.custom(lambda x: x, 'EventName'),
    nf.custom(lambda x: x, 'ScreenHeight'),
    nf.custom(lambda x: x >= startdate, 'EventDateTime')
).project(
    'AppID', 'ScreenDPI', 'ScreenHeight', 'ScreenWidth', 'Manufacturer', 'Model',
    device_id=ne.custom(lambda x: str(x).lower(), 'DeviceID'),
    platform=ne.custom(lambda x: str(x).lower(), 'AppPlatform')
).unique('device_id').put('$job_root/antifraud/weeks/all_metrika_screens')

installs = job.table('$job_root/antifraud/weeks/installs').filter(
    nf.custom(lambda x: x, 'uuid')).project('campaign_extended_name', device_id=ne.custom(lambda x: x.lower(), 'uuid'))

join_1 = installs.join(metrika_screens, by='device_id', type='left')

models_freq = metrika_screens.groupby('Manufacturer', 'Model', 'ScreenDPI', 'ScreenHeight', 'ScreenWidth',
                                     'platform').aggregate(freq=na.count())

twins = models_freq.filter(nf.custom(lambda x: x > 1, 'freq')).groupby('Manufacturer', 'Model', 'platform').aggregate(
    doubles=na.count()).filter(nf.custom(lambda x: x > 1, 'doubles'))

all = twins.join(models_freq, by=['Manufacturer', 'Model', 'platform'], type='left').sort('Manufacturer', 'Model',
                                                                                          'platform')

preinstall_screen_size = join_1.filter(nf.custom(lambda x: 'preinstall' in x, 'campaign_extended_name')).project(
    'Manufacturer', 'Model', 'ScreenDPI', 'ScreenHeight', 'ScreenWidth', 'platform').unique('Manufacturer', 'Model',
                                                                                            'ScreenDPI',
                                                                                            'ScreenHeight',
                                                                                            'ScreenWidth', 'platform')
top = all.groupby('Manufacturer', 'Model', 'platform').top(1, 'freq')

bad = all.join(top, by=['Manufacturer', 'Model', 'ScreenDPI', 'ScreenHeight', 'ScreenWidth', 'platform'],
               type='left_only').join(
    preinstall_screen_size, by=['Manufacturer', 'Model', 'ScreenDPI', 'ScreenHeight', 'ScreenWidth', 'platform'],
    type='left_only')

top = top.project('Manufacturer', 'Model', 'ScreenDPI', 'ScreenHeight', 'ScreenWidth', 'platform', is_good=ne.const(1),
                  is_bad=ne.const(0))
bad = bad.project('Manufacturer', 'Model', 'ScreenDPI', 'ScreenHeight', 'ScreenWidth', 'platform', is_bad=ne.const(1),
                  is_good=ne.const(0))

concat = job.concat(top, bad)

join_2 = join_1.join(concat, by=['Manufacturer', 'Model', 'ScreenDPI', 'ScreenHeight', 'ScreenWidth', 'platform'],
                     type='left')

porogs = join_2.groupby('platform').aggregate(
    good=na.sum('is_good'),
    bad=na.sum('is_bad')).project(
    'platform',
    porog_percent=ne.custom(lambda x, y: x * 100. / (x + y) if y else 100., 'bad', 'good'))

join_2.groupby('campaign_extended_name').aggregate(
    all=na.count(),
    good=na.sum('is_good'),
    bad=na.sum('is_bad'),
).filter(nf.custom(lambda x: x and x > 1, 'bad'),
         ).project(ne.all(),
                   bad_percent=ne.custom(lambda x, y: x * 100. / (x + y) if y else 100., 'bad', 'good'),
                   ).project(ne.all(),
                             platform=ne.custom(lambda x: 'android' if 'android' in x.lower() else 'ios', 'campaign_extended_name')
                             ).join(porogs, by='platform', type='left').filter(
    nf.custom(lambda x, y: x > y, 'bad_percent', 'porog_percent')).sort(
    'bad_percent').put('$job_root/antifraud/weeks/anomaly_campaign/anomaly_campaign_by_screen_size')

job.run()
