from nile.api.v1 import (
    aggregators as na,
    filters as nf,
    extractors as ne,
    grouping as ng,
    clusters,
    files,
    statface,
    Record,
    Template,
    Path
)

from qb2.api.v1 import filters as sf
from qb2.api.v1 import QB2, extractors as se
import pandas as pd
import datetime
import numpy as np
startdate = datetime.datetime.now() - datetime.timedelta(days=2)
enddate = startdate + datetime.timedelta(days=1)
startdate = str(startdate).split(' ')[0]
enddate = str(enddate).split(' ')[0]
cluster = clusters.Hahn(pool='mobile-research', token='a6575e1e15b0475fb8d8564beaa60f23').env(
    templates=dict(
        job_root='home/turkey-analytics/ktereshin',
        dates='{2016-07-19}'
    )
)
import urllib2
import datetime
import re
currentdate = datetime.datetime.now()
currentdatestr = str(currentdate).split(' ')[0]
import operator
import re
from functools import partial
# parce clid dict
while True:
    try:
        url_dict = 'https://hahn.yt.yandex-team.ru/api/v2/download?path=//statbox/statbox-dict/'+currentdatestr+'/distr_report&disposition=attachment'
        req = urllib2.Request(url_dict)
        dict_all_to_parse = urllib2.urlopen(req)
        dict_all = dict_all_to_parse.read()
        rr = dict_all.split('\n')
        clid_dict = {}
        for line in rr:
            par = line.split('\t')
            clid_dict[par[1]] = {}
            if par[5] == '101':
                clid_dict[par[1]]['channel_type'] = 'Pre-Installs'
                clid_dict[par[1]]['media_source_type'] = par[8]
                clid_dict[par[1]]['campaign'] = par[3]
                clid_dict[par[1]]['channel_code'] = par[5]
            elif par[5] == '206':
                clid_dict[par[1]]['channel_type'] = 'Pre-Installs'
                clid_dict[par[1]]['media_source_type'] = par[8]
                clid_dict[par[1]]['campaign'] = par[3]
                clid_dict[par[1]]['channel_code'] = par[5]
            elif par[5] == '105':
                clid_dict[par[1]]['channel_type'] = 'Store-Installs'
                clid_dict[par[1]]['media_source_type'] = par[8]
                clid_dict[par[1]]['campaign'] = par[3]
                clid_dict[par[1]]['channel_code'] = par[5]
            elif par[5] == '147':
                clid_dict[par[1]]['channel_type'] = 'Store-Installs'
                clid_dict[par[1]]['media_source_type'] = 'Custom partner builds'
                clid_dict[par[1]]['campaign'] = par[3]
                clid_dict[par[1]]['channel_code'] = par[5]
            else:
                clid_dict[par[1]]['channel_type'] = 'Other'
                clid_dict[par[1]]['media_source_type'] = par[8]
                clid_dict[par[1]]['campaign'] = par[3]
                clid_dict[par[1]]['channel_code'] = par[5]
        break
    except:
        print 'attempt'
        currentdate = currentdate - datetime.timedelta(days = 1)
        currentdatestr = str(currentdate).split(' ')[0]
        continue
def dict_clid(x,cdict):
    try:
        if cdict[x]['channel_code'] in ['101','206']:
            return 'Pre_Installs'
        elif cdict[x]['channel_code'] in ['105','147']:
            return 'Store_Installs'
        else:
            return 'Other'
    except:
        return 'Other'
def channel(x,cdict):
    try:
        return cdict[x]['media_source_type']
    except:
        return 'Other'
pdict_clid = partial(dict_clid, cdict = clid_dict)
pchannel = partial(channel, cdict = clid_dict)
def week_start(x):
    try:
        return str(datetime.datetime.strptime(str(x), '%Y-%m-%d') - datetime.timedelta(days=datetime.datetime.strptime(str(x), '%Y-%m-%d').weekday())).split(' ')[0]
    except:
        return None
def month_start(x):
    try:
        return str(datetime.datetime.strptime(str(x), '%Y-%m-%d').replace(day=1)).split(' ')[0]
    except:
        return None
def install_day(x,y):
    if str(x) == str(y):
        return 'New'
    else:
        return 'Old'
def install_month(x,y):
    if str(x) == str(y):
        return 'New'
    else:
        return 'Old'
def install_week(x,y):
    if str(x) == str(y):
        return 'New'
    else:
        return 'Old'
def d_delta(x, y):
    try:
        t = datetime.datetime.strptime(str(x), '%Y-%m-%d') - datetime.datetime.strptime(str(y), '%Y-%m-%d')
        return t.days
    except:
        return None
def w_delta(x, y):
    try:
        t = datetime.datetime.strptime(str(x), '%Y-%m-%d') - datetime.datetime.strptime(str(y), '%Y-%m-%d')
        return int(t.days/7)
    except:
        return None
def m_delta(x, y):
    try:
        return (datetime.datetime.strptime(str(x), '%Y-%m-%d').year - datetime.datetime.strptime(str(y), '%Y-%m-%d').year)*12 + datetime.datetime.strptime(str(x), '%Y-%m-%d').month - datetime.datetime.strptime(str(y), '%Y-%m-%d').month
    except:
        return None
def country(x):
    if x == '225':
        return 'RU'
    elif x == '187':
        return 'UA'
    elif x == '159':
        return 'KZ'
    elif x == '149':
        return 'BY'
    elif x == '983':
        return 'TR'
    else:
        return 'Other'
def dd(x):
    if not type(x) == int and not type(x) == float:
        return 0
    else:
        return x
job = cluster.job()
search = job.table('$job_root/searches_money/searches_money').filter(nf.custom(lambda x: x != None and x != '', 'device_id'))
search = search.groupby('date', 'browser', 'device_id', 'channel_type').aggregate(searches=na.sum('searches'), revenue = na.sum('revenue'))
installs = job.table('$job_root/installs/installs_uuid').filter(nf.custom(lambda x: x != None and x != '', 'device_id'))
installs = installs.groupby('device_id', 'project', 'fielddate', 'media_source_type', 'media_source', 'campaign', 'channel_type', 'platform','country').aggregate(some=na.count())\
.project('device_id', 'platform', 'project', 'fielddate',  'media_source_type', 'media_source','campaign', 'country', install_channel_type = 'channel_type', users=ne.const(1))
installs = installs.join(search, by='device_id', type='left')
installs = installs.project(ne.all(), week = ne.custom(week_start, 'date'),
                            install_week = ne.custom(week_start, 'fielddate'),
                            month = ne.custom(month_start, 'date'),
                            install_month = ne.custom(month_start, 'fielddate'),
                            searches = ne.custom(dd,'searches'),
                            revenue = ne.custom(dd,'revenue'))
installs = installs.project(ne.all(), new_day = ne.custom(install_day, 'date', 'fielddate'),
                            new_week = ne.custom(install_day, 'week', 'install_week'),
                            new_month = ne.custom(install_day, 'month', 'install_month'),
                            days_delta = ne.custom(d_delta, 'date', 'fielddate'),
                            weeks_delta = ne.custom(w_delta, 'week', 'install_week'),
                            months_delta = ne.custom(m_delta, 'month', 'install_month'))
installs = installs.put('$job_root/searches_money/install_searches_money_new_old')
job.run()
