#-*-coding: utf8 -*-

from nile.api.v1 import (
    filters as nf,
    aggregators as na,
    extractors as ne,
    clusters,
    Record
)

from qb2.api.v1 import extractors as se, filters as sf

import nile
import libra
from datetime import datetime
import uatraits
import urllib, re, random, urlparse
import pandas as pd


username = 'ensuetina'
date_format = '%Y-%m-%d'

# выбираем кластер
cluster = clusters.yt.Hahn(pool = 'search-research_ensuetina').env(templates=dict(
                                job_root='home/search-research/' + username + '/DISTRIBUTION_METRIC/MORDA_SEARCH'
                            )
                            )
dates = [i.strftime('%Y-%m-%d' ) for i in pd.date_range('2016-07-11', '2016-08-13')]

class get_percentilles:
    def __init__(self,dlines,tlines):
        self.dlines = dlines
        self.tlines = tlines

    def __call__(self, groups):
        for key,records in groups:
            plat = key.plat
            if plat == 'desktop':
                lines = self.dlines
            else:
                lines = self.tlines

            #lines = self.lines
            q10 = int(lines*0.1)
            q50 = int(lines*0.5)
            q90 = int(lines*0.9)
            i = 0
            sum_data = 0
            sum10 = 0
            sum50 = 0
            sum90 = 0
            for rec in records:
                if i == q10:
                    sum10 = sum_data
                    quantille10 = round(float(sum_data)/float(q10),2)
                elif i == q50:
                    sum50 = sum_data
                    quantille50 = round(float(sum_data)/float(q50),2)
                elif i == q90:
                    sum90 = sum_data
                    quantille90 = round(float(sum_data)/float(q90),2)

                sum_data += rec.coverage
                i += 1

            mean = round(float(sum_data)/float(lines),2)

            yield Record(plat=plat,lines=lines,q10=q10,q50=q50,q90=q90,
                         quantille10=quantille10,quantille50=quantille50,
                         quantille90=quantille90,mean=mean, sum10=sum10,
                         sum50=sum50, sum90=sum90, sum_data=sum_data
                        )


def map_us(lines):
    for l in lines:
        uid = l.key
        val = l.value

        data = dict([d.split('=', 1) for d in val.split('\t') if '=' in d])

        ttype = str(data.get('type'))
        service = str(data.get('service'))
        ui = str(data.get('ui'))
        path = str(data.get('path'))
        vv = str(data.get('vars'))
        reqid = str(data.get('reqid'))

        if ttype == 'TECH' and 'tech.portal-ads' in path:
            yield Record(uid=uid,service=service,ui=ui,path=path,vv=vv,value=val)

def get_service(x):
    if 'Morda_RU' in x or 'Morda_Mobile_All' in x:
        return 'morda'
    elif 'Maps.Normal' in x or 'Maps_Mobile_All' in x:
        return 'maps'
    elif 'Mail_Yandex_RU' in x or 'Mail_Mobile_All' in x:
        return 'mail'
    elif 'Image_RU' in x:
        return 'images'
    elif 'News_RU' in x or 'News_Mobile_All' in x:
        return 'news'
    elif 'Weather' in x or 'Weather_Mobile_All' in x:
        return 'weather'
    elif 'Translate' in x:
        return 'translate'
    else:
        return 'search'

for date in dates:
    job = cluster.job()

    access = job.table('statbox/access-log/' + date)
    banners = job.table('home/personalization/v4_daily/'+ date + '/atom_banners')
    us = job.table('user_sessions/pub/search_daily/' + date + '/clean')


    hits = access.qb2(log='access-log',
                  fields = ['yandexuid','canonized_vhost','domain','page','date',
                            se.log_field('raw_yandexuid').hide().allow_override(),
                            se.custom('prj',str,'projects'),
                            se.custom('service',get_service,'prj'),
                            se.parameter('p')
                           ],
                  filters = [sf.default_filtering('access-log'),
                             sf.projects(['Morda_RU', 'Morda_Mobile_All',
                                          'Yandex_RU', 'Search_Mobile_All',
                                          #'Video', 'm.video.yandex.ru',
                                          #'Image_RU',# 'Image_Mobile_All',
                                          #'Maps.Normal', 'Maps_Mobile_All',
                                          #'News_RU', 'News_Mobile_All',
                                          #'Weather', 'Weather_Mobile_All',
                                          #'Mail_Yandex_RU', 'Mail_Mobile_All',
                                          #'Translate'
                                         ]),
                             sf.or_(sf.not_(sf.defined('p')),
                                    sf.equals('p','0')
                                   ),
                             sf.equals('domain','ru')
                            ]
                 ).groupby('yandexuid',
                           'domain'
                          ).aggregate(hits=na.count()).put('$job_root/aggr_hits')


    distrib_all = banners.filter(sf.defined('yandexuid'),
                               sf.equals('eventtype','show'),
                               sf.not_(sf.or_(sf.contains('distr_obj','footer'),
                                              sf.contains('distr_obj','teaser'),
                                              sf.contains('distr_obj','softlink'),
                                              sf.contains('distr_obj','soft_link'),
                                              sf.contains('distr_obj','bannermedia')
                                             )
                                      ),
                               sf.one_of('referer',{'yandex.ru',
                                                    'yandex.ru/search',
                                                    'yandex.ru/touchsearch',
                                                    #'yandex.ru/maps',
                                                    #'yandex.ru/images',
                                                    #'news.yandex.ru',
                                                    #'pogoda.yandex.ru'
                                                   })
                              ).filter(nf.equals('country','ru')).project(ne.all(),
                                                                          plat=ne.custom(lambda x: 'desktop' if x=='yandex.ru/search' or x=='yandex.ru' else 'touch','referer')
                                                                         ).put('$job_root/all_distrib_shows')

    distrib = distrib_all.groupby('yandexuid','country','plat').aggregate(shows=na.count()).put('$job_root/uids_shows')


    j1 = distrib.join(hits,by='yandexuid',type='inner')#.put('$job_root/joined_uids')

    joined_uids = j1.project(ne.all(),
                         coverage = ne.custom(lambda x,y: round(float(x)/float(y),2),'shows','hits'),
                         neg_cov = ne.custom(lambda x,y: -1*round(float(x)/float(y),2),'shows','hits')
                        ).sort('neg_cov').put('$job_root/joined_coverage')

    aggr = joined_uids.groupby('plat').aggregate(lines = na.count()).put('$job_root/lines')

    job.run()

    # поулчаем константу - число приджойненных уидов
    records = cluster.read('$job_root/lines')

    for rec in records:
        if rec.plat == 'desktop':
            dlines = rec.lines
        else:
            tlines = rec.lines

    job = cluster.job()

    joined_uids = job.table('$job_root/joined_coverage')

    result = joined_uids.groupby('plat').sort('neg_cov').reduce(get_percentilles(dlines,tlines)).project(ne.all(),
                                                                           date=ne.const(date)
                                                                          ).put('$job_root/report_full',append=True)
    result_filtered = joined_uids.filter(nf.custom(lambda x: float(x)<=1.0, 'coverage')
                                        ).groupby('plat').sort('neg_cov').reduce(get_percentilles(dlines,tlines)).project(ne.all(),
                                                                                                  date=ne.const(date)
                                                                                                 ).put('$job_root/report_filtered',append=True)


    job.run()

