# -*- coding: UTF-8 -*-

from nile.api.v1 import (
    filters as nf,
    aggregators as na,
    extractors as ne,
    clusters,
    Record
)

from qb2.api.v1 import extractors as se, filters as sf

import nile

#from nile.api.alpha import clusters

from datetime import datetime, timedelta, date
import uatraits
import urllib, re, random, urlparse

import pandas as pd

dates = [i.strftime('%Y-%m-%d' ) for i in pd.date_range('2016-10-07', '2016-10-11')]

cluster = clusters.yt.Hahn(pool = 'search-research_ensuetina').env(templates=dict(
                                job_root='home/search-research/ensuetina/GET_DISTRIB_UIDS'))

#cluster = clusters.yql.Hahn()

def get_cookie_age(cookie,ts):
    try:
        uid_ts = int(cookie[-10:])
        ts = int(ts)
    except:
        return None

    if ts - uid_ts < 1800:
        return 'new'
    else:
        return 'old'

def get_eventtype(vv):
    return vv.get('eventtype')

def get_os(vv):
    sh = str(vv.get('showid'))
    try:
        os = sh[6:8]
    except:
        return None

    return os

for date in dates:
#    continue
    job = cluster.job().env(default_mode='yamr_lines')

    redir_log = job.table('statbox/redir-log.6p/' + date)

    shows = redir_log.qb2(log='redir-log',
                           fields=['normal_path', 'normal_vars','yandexuid','date','timestamp',
                                   se.custom('cookie_age',get_cookie_age,'yandexuid','timestamp'),
                                   se.custom('os',get_os,'normal_vars'),
                                   se.custom('event',get_eventtype,'normal_vars')
                                  ],
                           filters = [sf.default_filtering('redir-log'),
                                      sf.defined('normal_path'),
                                      sf.contains('normal_path','tech.portal-ads'),
                                      sf.equals('event','show'),
                                      sf.equals('os','30')
                                     ]
                         ).put('$job_root/distrib_shows_new',append=True)

    job.run()

job = cluster.job()

t1 = job.table('$job_root/distrib_shows')
t2 = job.table('$job_root/distrib_shows_new')

t = job.concat(t1,t2)

tot = t.groupby('date','yandexuid').aggregate(shows = na.count(),
                                        isnew = na.min('cookie_age'),
                                        isold = na.max('cookie_age')
                                        ).groupby('date','isnew','isold').aggregate(shows=na.sum('shows'), uids = na.count()).put('$job_root/aggr_all_shows')

first_shows = t.groupby('yandexuid').aggregate(date = na.min('date'),min_date=na.min('date')).put('$job_root/first_shows')

first = t.join(first_shows,by=('yandexuid','date'),type='inner').put('$job_root/joined_reqs')

#t = job.table('home/search-research/ensuetina/GET_DISTRIB_UIDS/joined_reqs')

first.groupby('date','yandexuid').aggregate(first_shows = na.count(),
                                        fnew = na.min('cookie_age'),
                                        fold = na.max('cookie_age')
                                       ).groupby('date','fnew','fold').aggregate(fshows = na.sum('first_shows'), fuids = na.count()).put('home/search-research/ensuetina/GET_DISTRIB_UIDS/aggr_new')

job.run()



