from nile.api.v1 import (
    aggregators as na,
    filters as nf,
    extractors as ne,
    grouping as ng,
    clusters,
    files,
    statface,
    Record,
    Template,
    Path
)
from qb2.api.v1 import filters as sf
from qb2.api.v1 import QB2, extractors as se
import pandas as pd
import datetime
import numpy as np
import random
import re
startdate = datetime.datetime.now() - datetime.timedelta(days=1)
startdate = str(startdate).split(' ')[0]
cluster = clusters.Hahn(pool='mobile-research', token='a6575e1e15b0475fb8d8564beaa60f23').env(
    templates=dict(
        job_root='home/turkey-analytics/ktereshin',
        dates='{%s}' % (startdate)
    )
)

def get_hw_id(x):
    return str(x).replace('"', '').replace('''[''', '').replace(''']''', '').replace("'", "")
job = cluster.job()
metrika = job.table('statbox/metrika-mobile-log/@dates').filter(nf.custom(lambda x: x in ['10321'], 'APIKey'),
                                                                nf.custom(lambda x: x != None and x != '', 'ReportEnvironment_Values'),
                                                                nf.custom(lambda x: 'pw_hwid' in str(x), 'ReportEnvironment_Keys'),
                                                               nf.custom(lambda x: x != None and x != '', 'DeviceID')).project(device_id = 'DeviceID', hw_id = 'ReportEnvironment_Values')
metrika = metrika.groupby('device_id', 'hw_id').aggregate(some = na.count()).project('device_id', hw_id = ne.custom(get_hw_id,'hw_id'))
metrika = metrika.put('$job_root/hw_id/dict_hw_id_unique', append=True)
job.run()
job = cluster.job()
metrika = job.table('$job_root/hw_id/dict_hw_id_unique').groupby('device_id', 'hw_id').aggregate(some=na.count()).project('device_id', 'hw_id')
metrika= metrika.put('$job_root/hw_id/dict_hw_id_unique')
job.run()
job = cluster.job()
hw_id = job.table('$job_root/hw_id/dict_hw_id_unique')
did = hw_id.groupby('device_id').aggregate(some=na.count()).filter(nf.custom(lambda x: x == 1,'some')).project('device_id')
hw = hw_id.groupby('hw_id').aggregate(some=na.count()).filter(nf.custom(lambda x: x == 1,'some')).project('hw_id')
hw_id = hw_id.join(did, by='device_id', type='inner')
hw_id = hw_id.join(hw, by='hw_id', type='inner')
hw_id = hw_id.put('$job_root/hw_id/dict')
job.run()
