from nile.api.v1 import (
    aggregators as na,
    filters as nf,
    extractors as ne,
    grouping as ng,
    clusters,
    files,
    statface,
    Record,
    Template,
    Path
)

from qb2.api.v1 import filters as sf
from qb2.api.v1 import QB2, extractors as se
import pandas as pd
import datetime
import requests
import numpy as np
import datetime
import re
import json
enddate = datetime.datetime.now() - datetime.timedelta(days=1)
startdate = datetime.datetime.now() - datetime.timedelta(days=90)
enddate = str(enddate).split(' ')[0]
startdate = str(startdate).split(' ')[0]
cluster = clusters.Hahn(pool='mobile-research', token='a6575e1e15b0475fb8d8564beaa60f23').env(
    templates=dict(
        job_root='home/turkey-analytics/ktereshin',
        dates='{%s}' % (enddate)
    )
)
def get_token(x):
    try:
        dict_ = json.loads(x)
        if 'token' in dict_.keys():
            return dict_['token']
    except:
        return None
def get_token_with_max_date(groups):
    for key, records in groups:
        max_date = '2000-01-01'
        token = ''
        for record in records:
            if record.token != None or record.token != '':
                if record.EventDateTime >= max_date:
                    max_date = record.EventDateTime
                    token = record.token
        if token != None or token != '':
            yield Record(DeviceID = key.DeviceID, token = token, EventDateTime = max_date)
job = cluster.job()
appsfluer = job.table('statbox/metrika-mobile-log/@dates') \
    .filter(
        nf.custom(lambda x: x in ['10321','106400', '19531', '19534','42989'], 'APIKey'),
        nf.custom(lambda x: 'push' in str(x).lower(), 'EventName'),
        nf.custom(lambda x: x != None and x != '', 'DeviceID')
    ) \
    .project(
        'DeviceID',
        'EventDateTime',
        token = ne.custom(get_token, 'EventValue')
    ) \
    .filter(
        nf.custom(lambda x: x != None or x != '', 'token')
    ) \
    .groupby(
        'DeviceID',
        'token',
        'EventDateTime'
    ) \
    .aggregate(
        count = na.count()
    ) \
    .groupby(
        'DeviceID'
    ) \
    .reduce(get_token_with_max_date) \
    .put('$job_root/antifraud/did_puth_token_dict', append=True)
job.run()
job = cluster.job()
appsfluer = job.table('$job_root/antifraud/did_puth_token_dict') \
    .groupby(
        'DeviceID'
    ) \
    .reduce(get_token_with_max_date) \
    .put('$job_root/antifraud/did_puth_token_dict')
job.run()
