#-*-coding: utf8 -*-

from nile.api.v1 import (
    filters as nf,
    aggregators as na,
    extractors as ne,
    statface as ns,
    clusters,
    Record
)

from qb2.api.v1 import extractors as se, filters as sf

import nile
import libra
from datetime import datetime
import datetime
import uatraits
import urllib, re, random, urlparse

import pandas as pd

class get_percentilles:
    def __init__(self,lines):
        self.lines = lines

    def __call__(self, groups):
        for key,records in groups:
            plat = key.plat
            serv = key.service
            obj = key.distr_obj
            testid = key.testid

            k = plat + '&' + serv + '&' + obj + '&' + testid

            lines = self.lines[k]

            if lines == 0:
                continue

            #lines = self.lines
            q10 = int(lines*0.1)
            q20 = int(lines*0.2)
            q30 = int(lines*0.3)
            q40 = int(lines*0.4)
            q50 = int(lines*0.5)
            q60 = int(lines*0.6)
            q70 = int(lines*0.7)
            q80 = int(lines*0.8)
            q90 = int(lines*0.9)

            i = 0
            sum_data = 0
            sum10 = 0
            sum20 = 0
            sum30 = 0
            sum40 = 0
            sum50 = 0
            sum60 = 0
            sum70 = 0
            sum80 = 0
            sum90 = 0

            quantille10 = -1
            quantille20 = -1
            quantille30 = -1
            quantille40 = -1
            quantille50 = -1
            quantille60 = -1
            quantille70 = -1
            quantille80 = -1
            quantille90 = -1

            for rec in records:
                sum_data += rec.coverage

                if i == q10 and q10 != 0:
                    sum10 = sum_data
                    quantille10 = round(float(sum_data)/float(q10),2)
                if i == q20 and q20 != 0:
                    sum20 = sum_data
                    quantille20 = round(float(sum_data)/float(q20),2)
                if i == q30 and q30 != 0:
                    sum30 = sum_data
                    quantille30 = round(float(sum_data)/float(q30),2)
                if i == q40 and q40 != 0:
                    sum40 = sum_data
                    quantille40 = round(float(sum_data)/float(q40),2)
                if i == q50 and q50 != 0:
                    sum50 = sum_data
                    quantille50 = round(float(sum_data)/float(q50),2)
                if i == q60 and q60 != 0:
                    sum60 = sum_data
                    quantille60 = round(float(sum_data)/float(q60),2)
                if i == q70 and q70 != 0:
                    sum70 = sum_data
                    quantille70 = round(float(sum_data)/float(q70),2)
                if i == q80 and q80 != 0:
                    sum80 = sum_data
                    quantille80 = round(float(sum_data)/float(q80),2)
                if i == q90 and q90 != 0:
                    sum90 = sum_data
                    quantille90 = round(float(sum_data)/float(q90),2)

                i += 1

            mean = round(float(sum_data)/float(lines),2)

            if q10 == 0:
                quantille10 = mean
            if q20 == 0:
                quantille20 = mean
            if q30 == 0:
                quantille30 = mean
            if q40 == 0:
                quantille40 = mean
            if q50 == 0:
                quantille50 = mean
            if q60 == 0:
                quantille60 = mean
            if q70 == 0:
                quantille70 = mean
            if q80 == 0:
                quantille80 = mean
            if q90 == 0:
                quantille90 = mean

            yield Record(plat=plat,service=serv,distr_obj=obj,lines=lines,testid=testid,q10=q10,q20=q20,q30=q30,q40=q40,q50=q50,q60=q60,q70=q70,q80=q80,q90=q90,
                         quantille10=quantille10,quantille20=quantille20,quantille30=quantille30,quantille40=quantille40,quantille50=quantille50,
                         quantille60=quantille60,quantille70=quantille70,quantille80=quantille80,quantille90=quantille90, mean=mean,
                         sum10=sum10,sum20=sum20,sum30=sum30,sum40=sum40,sum50=sum50,sum60=sum60,sum70=sum70,sum80=sum80,sum90=sum90,sum_data=sum_data
                        )

#yandsearch или /search/ – 731962
#padsearch или /search/pad/ – 21689008
#touchsearch или /search/touch/ – 22555771

#Тачевая Морда 23474449
#Морда КУБР 722545

def get_service(counter):
    if counter == '731962':
        return 'search'
    elif counter == '22555771':
        return 'search'
    elif counter == '23474449':
        return 'morda'
    elif counter == '722545':
        return 'morda'
    else:
        return '-'

def get_plat(counter):
    if counter == '731962':
        return 'desktop'
    elif counter == '22555771':
        return 'touch'
    elif counter == '23474449':
        return 'touch'
    elif counter == '722545':
        return 'desktop'
    else:
        return '-'

def reduce_shows(groups):
    for key, recs in groups:
        uid = key.yandexuid

        all_slices = {}
        total_testids = {}
        total_obj = {}
        total_service = {}

        total_testids_total_obj = {}
        total_testids_total_service = {}
        total_obj_total_service = {}

        total_all = {}

        for rec in recs:
            event = rec.eventtype
            obj = rec.distr_obj
            referer = rec.referer
            country = rec.country
            testids = rec.testids

            if getattr(rec, 'yesterday', None):
                continue

            if event != 'show' or country != 'ru':
                continue

            if 'footer' in obj or 'teaser' in obj or 'softlink' in obj or 'soft_link' in obj or 'bannermedia' in obj:
                continue

            if referer == 'yandex.ru':
                service = 'morda'
            elif referer == 'yandex.ru/search' or referer == 'yandex.ru/touchsearch':
                service = 'search'
            else:
                continue

            if testids == '':
                testids = 'no testid'

            tt = testids.split(',')
            for t in tt:
                k_all_slices = obj + '&' + service + '&' + t
                if not k_all_slices in all_slices:
                    all_slices[k_all_slices] = 1
                else:
                    all_slices[k_all_slices] += 1

                k_total_obj = '_total_' + '&' + service + '&' + t
                if not k_total_obj in total_obj:
                    total_obj[k_total_obj] = 1
                else:
                    total_obj[k_total_obj] += 1

                k_total_service = obj + '&' + '_total_' + '&' + t
                if not k_total_service in total_service:
                    total_service[k_total_service] = 1
                else:
                    total_service[k_total_service] += 1

                k_total_obj_total_service = '_total_' + '&' + '_total_' + '&' + t
                if not k_total_obj_total_service in total_obj_total_service:
                    total_obj_total_service[k_total_obj_total_service] = 1
                else:
                    total_obj_total_service[k_total_obj_total_service] += 1

            k_total_testids = obj + '&' + service + '&' + '_total_'
            if not k_total_testids in total_testids:
                total_testids[k_total_testids] = 1
            else:
                total_testids[k_total_testids] += 1

            k_total_testids_total_obj = '_total_' + '&' + service + '&' + '_total_'
            if not k_total_testids_total_obj in total_testids_total_obj:
                total_testids_total_obj[k_total_testids_total_obj] = 1
            else:
                total_testids_total_obj[k_total_testids_total_obj] += 1

            k_total_testids_total_service = obj + '&' + '_total_' + '&' + '_total_'
            if not k_total_testids_total_service in total_testids_total_service:
                total_testids_total_service[k_total_testids_total_service] = 1
            else:
                total_testids_total_service[k_total_testids_total_service] += 1

            k_total_all = '_total_' + '&' + '_total_' + '&' + '_total_'
            if not k_total_all in total_all:
                total_all[k_total_all] = 1
            else:
                total_all[k_total_all] += 1

        for k in all_slices:
            shows = all_slices[k]
            kk = k.split('&')
            obj = kk[0]
            serv = kk[1]
            testid = kk[2]
            yield Record(yandexuid=uid,distr_obj=obj,service=serv,testid=testid,shows=shows)

        for k in total_obj:
            shows = total_obj[k]
            kk = k.split('&')
            obj = kk[0]
            serv = kk[1]
            testid = kk[2]
            yield Record(yandexuid=uid,distr_obj=obj,service=serv,testid=testid,shows=shows)

        for k in total_service:
            shows = total_service[k]
            kk = k.split('&')
            obj = kk[0]
            serv = kk[1]
            testid = kk[2]
            yield Record(yandexuid=uid,distr_obj=obj,service=serv,testid=testid,shows=shows)

        for k in total_obj_total_service:
            shows = total_obj_total_service[k]
            kk = k.split('&')
            obj = kk[0]
            serv = kk[1]
            testid = kk[2]
            yield Record(yandexuid=uid,distr_obj=obj,service=serv,testid=testid,shows=shows)

        for k in total_testids:
            shows = total_testids[k]
            kk = k.split('&')
            obj = kk[0]
            serv = kk[1]
            testid = kk[2]
            yield Record(yandexuid=uid,distr_obj=obj,service=serv,testid=testid,shows=shows)

        for k in total_testids_total_obj:
            shows = total_testids_total_obj[k]
            kk = k.split('&')
            obj = kk[0]
            serv = kk[1]
            testid = kk[2]
            yield Record(yandexuid=uid,distr_obj=obj,service=serv,testid=testid,shows=shows)

        for k in total_testids_total_service:
            shows = total_testids_total_service[k]
            kk = k.split('&')
            obj = kk[0]
            serv = kk[1]
            testid = kk[2]
            yield Record(yandexuid=uid,distr_obj=obj,service=serv,testid=testid,shows=shows)

        for k in total_all:
            shows = total_all[k]
            kk = k.split('&')
            obj = kk[0]
            serv = kk[1]
            testid = kk[2]
            yield Record(yandexuid=uid,distr_obj=obj,service=serv,testid=testid,shows=shows)

def reduce_hits(groups):
    for key,recs in groups:
        uid = key.yandexuid

        total_serv = {}
        total_plat = {}
        total_serv_total_plat = {}
        all_slices = {}

        for rec in recs:
            service = rec.service
            plat = rec.plat
            date = rec.date
            hits = rec.hits

            k_all_slices = plat + '&' + service
            if not k_all_slices in all_slices:
                all_slices[k_all_slices] = hits
            else:
                all_slices[k_all_slices] += hits

            k_total_plat = '_total_' + '&' + service
            if not k_total_plat in total_plat:
                total_plat[k_total_plat] = hits
            else:
                total_plat[k_total_plat] += hits

            k_total_serv = plat + '&' + '_total_'
            if not k_total_serv in total_serv:
                total_serv[k_total_serv] = hits
            else:
                total_serv[k_total_serv] += hits

            k_total_serv_total_plat = '_total_' + '&' + '_total_'
            if not k_total_serv_total_plat in total_serv_total_plat:
                total_serv_total_plat[k_total_serv_total_plat] = hits
            else:
                total_serv_total_plat[k_total_serv_total_plat] += hits

        for k in all_slices:
            hits = all_slices[k]

            kk = k.split('&')
            plat = kk[0]
            serv = kk[1]

            yield Record(yandexuid=uid,date=date,plat=plat,service=serv,hits=hits)

        for k in total_plat:
            hits = total_plat[k]

            kk = k.split('&')
            plat = kk[0]
            serv = kk[1]

            yield Record(yandexuid=uid,date=date,plat=plat,service=serv,hits=hits)

        for k in total_serv:
            hits = total_serv[k]

            kk = k.split('&')
            plat = kk[0]
            serv = kk[1]

            yield Record(yandexuid=uid,date=date,plat=plat,service=serv,hits=hits)

        for k in total_serv_total_plat:
            hits = total_serv_total_plat[k]

            kk = k.split('&')
            plat = kk[0]
            serv = kk[1]

            yield Record(yandexuid=uid,date=date,plat=plat,service=serv,hits=hits)



full_path_to_libra = '/home/ensuetina/libra.so'
full_path_to_blockstat = '/home/ensuetina/blockstat.dict'

username = 'ensuetina'
date_format = '%Y-%m-%d'

# выбираем кластер
cluster = clusters.yt.Hahn(pool = 'search-research_ensuetina').env(templates=dict(
                                job_root='home/search-research/' + username + '/DISTRIBUTION_METRIC/WEB'
                            )
                            )

#dates = [i.strftime('%Y-%m-%d' ) for i in pd.date_range('2016-10-21', '2016-10-22')]

date = datetime.datetime.today() + datetime.timedelta(days=-1)
date = date.strftime(date_format)
dates = [date]

for date in dates:
    print date

    job = cluster.job()

    wl = job.table('statbox/watch-log/' + date, ignore_missing = True)
    banners = job.table('home/personalization/v4_daily/'+ date + '/atom_banners', ignore_missing = True)


    ##########################
    ######### HITS ###########
    ##########################

    hits = wl.qb2(log = 'watch-log',
                  fields = ['uid','canonized_vhost','domain','page','date','counter_id',
                            se.custom('service',get_service,'counter_id'),
                            se.custom('plat',get_plat,'counter_id')
                           ],
                  filters = [sf.default_filtering('watch-log'),
                             sf.defined('uid'),
                             sf.one_of('counter_id',{'731962','22555771','23474449','722545'})
                             #sf.equals('domain','ru')
                            ]
                 ).groupby('uid',
                           'date',
                           'plat',
                           'service'
                          ).aggregate(hits=na.count()).project('date','plat','service','hits',
                                                               yandexuid='uid'
                                                              ).groupby('yandexuid').reduce(reduce_hits).put('$job_root/all_hits')



    ##########################
    ######## BANNERS #########
    ##########################

    distrib = banners.filter(sf.defined('yandexuid')).groupby('yandexuid').reduce(reduce_shows).put('$job_root/distrib_shows')


    ##########################
    ########## JOIN ##########
    ##########################

    j1 = distrib.join(hits,by=('yandexuid','service'),type='inner')#.put('$job_root/joined_uids')

    joined_uids = j1.project(ne.all(),
                             coverage = ne.custom(lambda x,y: round(float(x)/float(y),2),'shows','hits'),
                             neg_cov = ne.custom(lambda x,y: -1*round(float(x)/float(y),2),'shows','hits')
                            ).filter(nf.custom(lambda x: float(x)<=1.0, 'coverage')
                                    ).sort('neg_cov').put('$job_root/joined_coverage')

    aggr = joined_uids.groupby('plat','service','distr_obj','testid').aggregate(lines = na.count(),
                                                                       uids = na.count(),
                                                                       shows = na.sum('shows')
                                                                      ).put('$job_root/lines')

    job.run()

    records = cluster.read('$job_root/lines')

    lines = {}

    for rec in records:
        plat = rec.plat
        serv = rec.service
        obj = rec.distr_obj
        testid = rec.testid

        k = plat + '&' + serv + '&' + obj + '&' + testid

        lines[k] = rec.lines

    job = cluster.job()

    joined_uids = job.table('$job_root/joined_coverage')

    spu = job.table('$job_root/lines').project('plat','service','distr_obj','testid',
                                               shows_per_user = ne.custom(lambda x,y: round(float(x)/float(y),2),'shows','uids')
                                              )

    result_filtered = joined_uids.groupby('plat',
                                          'service',
                                          'distr_obj',
                                          'testid'
                                         ).sort('neg_cov').reduce(get_percentilles(lines)).project(ne.all(),
                                                                                                   date=ne.const(date)
                                                                                                  ).put('$job_root/report_filtered')

    rep = result_filtered.join(spu,by=('plat','service','distr_obj','testid'),type='left')

    rep1 = rep.filter(sf.defined('shows_per_user'))
    rep2 = rep.filter(sf.not_(sf.defined('shows_per_user'))).project(ne.all(),
                                                                     shows_per_user = ne.const(-1)
                                                                    )

    report = job.concat(rep1,rep2).project(service = ne.custom(lambda x: x if x != '' else '-','service'),
                                           mean = ne.custom(lambda x: x if x != '' else '-','mean'),
                                           shows_per_user = ne.custom(lambda x: x if x != '' else '-','shows_per_user'),
                                           fielddate = ne.custom(lambda x: x if x != '' else '-','date'),
                                           where = ne.const('web'),
                                           testid = ne.custom(lambda x: x if x != '' else '-','testid'),
                                           platform = ne.custom(lambda x: x if x != '' else '-','plat'),
                                           product = ne.custom(lambda x: x if x != '' else '-','distr_obj'),
                                           q10 = ne.custom(lambda x: x if x != '' else '-','quantille10'),
                                           q20 = ne.custom(lambda x: x if x != '' else '-','quantille20'),
                                           q30 = ne.custom(lambda x: x if x != '' else '-','quantille30'),
                                           q40 = ne.custom(lambda x: x if x != '' else '-','quantille40'),
                                           q50 = ne.custom(lambda x: x if x != '' else '-','quantille50'),
                                           q60 = ne.custom(lambda x: x if x != '' else '-','quantille60'),
                                           q70 = ne.custom(lambda x: x if x != '' else '-','quantille70'),
                                           q80 = ne.custom(lambda x: x if x != '' else '-','quantille80'),
                                           q90 = ne.custom(lambda x: x if x != '' else '-','quantille90')
                                          ).unique('fielddate','where','platform','service','product','testid',
                                                   'q10','q20','q30','q40','q50','q60','q70','q80',
                                                   'q90','mean','shows_per_user').put('$job_root/report')


    job.run()

    client = ns.StatfaceClient(
        proxy = 'upload.stat.yandex-team.ru',
        username = '',
        password = ''
	)

    report = ns.StatfaceReport().path('Yandex_RU/Others/distribution_metric/report_v1').scale('daily')

    report = report.client(client)

    report = report.data(cluster.read('$job_root/report'))

    report.publish()

