#-*-coding: utf8 -*-

from nile.api.v1 import (
    filters as nf,
    aggregators as na,
    extractors as ne,
    statface as ns,
    clusters,
    Record
)

from qb2.api.v1 import extractors as se, filters as sf

import nile
import libra
from datetime import datetime
import uatraits
import urllib, re, random, urlparse

import pandas as pd

from Crypto.Cipher import Blowfish
import base64
import json


class get_percentilles:
    def __init__(self,lines):
        self.lines = lines

    def __call__(self, groups):
        for key,records in groups:
            plat = key.plat
            serv = key.service
            obj = key.distr_obj

            k = plat + '&' + serv + '&' + obj

            lines = self.lines[k]

            if lines == 0:
                continue

            #lines = self.lines
            q10 = int(lines*0.1)
            q20 = int(lines*0.2)
            q30 = int(lines*0.3)
            q40 = int(lines*0.4)
            q50 = int(lines*0.5)
            q60 = int(lines*0.6)
            q70 = int(lines*0.7)
            q80 = int(lines*0.8)
            q90 = int(lines*0.9)

            i = 0
            sum_data = 0
            sum10 = 0
            sum20 = 0
            sum30 = 0
            sum40 = 0
            sum50 = 0
            sum60 = 0
            sum70 = 0
            sum80 = 0
            sum90 = 0

            quantille10 = -1
            quantille20 = -1
            quantille30 = -1
            quantille40 = -1
            quantille50 = -1
            quantille60 = -1
            quantille70 = -1
            quantille80 = -1
            quantille90 = -1

            for rec in records:
                if i == q10 and q10 != 0:
                    sum10 = sum_data
                    quantille10 = round(float(sum_data)/float(q10),2)
                elif i == q20 and q20 != 0:
                    sum20 = sum_data
                    quantille20 = round(float(sum_data)/float(q20),2)
                elif i == q30 and q30 != 0:
                    sum30 = sum_data
                    quantille30 = round(float(sum_data)/float(q30),2)
                elif i == q40 and q40 != 0:
                    sum40 = sum_data
                    quantille40 = round(float(sum_data)/float(q40),2)
                elif i == q50 and q50 != 0:
                    sum50 = sum_data
                    quantille50 = round(float(sum_data)/float(q50),2)
                elif i == q60 and q60 != 0:
                    sum60 = sum_data
                    quantille60 = round(float(sum_data)/float(q60),2)
                elif i == q70 and q70 != 0:
                    sum70 = sum_data
                    quantille70 = round(float(sum_data)/float(q70),2)
                elif i == q80 and q80 != 0:
                    sum80 = sum_data
                    quantille80 = round(float(sum_data)/float(q80),2)
                elif i == q90 and q90 != 0:
                    sum90 = sum_data
                    quantille90 = round(float(sum_data)/float(q90),2)

                sum_data += rec.coverage
                i += 1

            mean = round(float(sum_data)/float(lines),2)

            if q10 == 0:
                quantille10 = mean
            if q20 == 0:
                quantille20 = mean
            if q30 == 0:
                quantille30 = mean
            if q40 == 0:
                quantille40 = mean
            if q50 == 0:
                quantille50 = mean
            if q60 == 0:
                quantille60 = mean
            if q70 == 0:
                quantille70 = mean
            if q80 == 0:
                quantille80 = mean
            if q90 == 0:
                quantille90 = mean

            yield Record(plat=plat,service=serv,distr_obj=obj,lines=lines,q10=q10,q20=q20,q30=q30,q40=q40,q50=q50,q60=q60,q70=q70,q80=q80,q90=q90,
                         quantille10=quantille10,quantille20=quantille20,quantille30=quantille30,quantille40=quantille40,quantille50=quantille50,
                         quantille60=quantille60,quantille70=quantille70,quantille80=quantille80,quantille90=quantille90, mean=mean,
                         sum10=sum10,sum20=sum20,sum30=sum30,sum40=sum40,sum50=sum50,sum60=sum60,sum70=sum70,sum80=sum80,sum90=sum90,sum_data=sum_data
                        )

def map_name(AppID):
    name_dict = {'ru.yandex.mobile..realty': '.realty', 'ru.yandex.direct.inhouse': 'direct.inhouse',
            'ru.yandex.disk': 'Disk', 'ru.yandex.disk.inhouse': 'disk.inhouse', 'ru.yandex.yandexbus.inhouse.debug': 'inhouse.debug',
            'ru.yandex.direct.inhouse.globus': 'inhouse.globus', 'ru.yandex.mobile.metro.inhouse.new': 'inhouse.new',
            'com.yandex.mobile.job': 'Job', 'ru.yandex.mobile.rabota': 'Job', 'com.yandex.mobile.job.dev': 'job.dev',
            'ru.kinopoisk': 'Kinopoisk', 'ru.kinopoisk.dev': 'kinopoisk.dev', 'ru.yandex.mail': 'Mail',
            'ru.yandex.mail.debug': 'mail.debug', 'ru.yandex.traffic': 'Maps', 'ru.yandex.yandexmaps': 'Maps',
            'ru.yandex.ymarket': 'Market', 'ru.yandex.metro': 'Metro', 'ru.yandex.mobile.metro': 'Metro',
            'ru.yandex.mobile.metro.inhouse': 'metro.inhouse', 'P01yandex.mobile.navigator': 'mobile.navigator',
            'ru.yandex.mobile.music': 'Music', 'ru.yandex.mobile.music.inhouse': 'music.inhouse',
            'ru.yandex.mobile.navigator': 'Navigator', 'ru.yandex.yandexnavi': 'Navigator',
            'ru.yandex.mobile.navigator.inhouse': 'navigator.inhouse', 'ru.yandex.mobile.navigator.sandbox': 'navigator.sandbox',
            'ru.yandex.parking.sandbox': 'parking.sandbox', 'ru.yandex.promolib.sample': 'promolib.sample',
            'ru.yandex.promolib.samplesl': 'promolib.samplesl', 'ru.yandex.mobile.rabota.inhouse': 'rabota.inhouse',
            'com.yandex.mobile.realty': 'Realty', 'ru.yandex.mobile.realty': 'Realty',
            'com.yandex.mobile.realty.dev': 'Realty Dev', 'ru.yandex.mobile.realty.inhouse': 'realty.inhouse',
            'ru.yandex.promolib.sample.sl': 'sample.sl', 'ru.yandex.test.promolib': 'test.promolib',
            'ru.yandex.test.promolib_': 'test.promolib_', 'ru.yandex.test.promolib1': 'test.promolib1',
            'ru.yandex.traffic.inhouse': 'traffic.inhouse', 'ru.yandex.traffic.sandbox': 'traffic.sandbox',
            'ru.yandex.rasp': 'Trains', 'ru.yandex.mobile.transport': 'Transport', 'ru.yandex.yandexbus': 'Transport',
            'ru.yandex.mobile.transport.inhouse': 'transport.inhouse', 'com.voltmobi.ymarket': 'voltmobi.ymarket',
            'ru.yandex.mobile.weather-v2': 'Weather', 'ru.yandex.weatherplugin': 'Weather',
            'ru.yandex.mobile.weather-v2.inhouse': 'weather-v2.inhouse', 'ru.yandex.weatherplugin': 'Weather (?)',
            'ru.yandex.disl': 'yandex.disl', 'ru.yandex.meow': 'yandex.meow',
            'com.yandex.NativeTemplatesTableViewExample': 'yandex.NativeTemplatesTableViewExample',
            'com.globus.yandex.weather': 'yandex.weather', 'ru.yandex.yandexbus.inhouse': 'yandexbus.inhouse',
            'ru.yandex.yandexmaps.clone': 'yandexmaps.clone', 'ru.yandex.yandexmaps.debug': 'yandexmaps.debug',
            'ru.yandex.yandexmaps.inhouse': 'yandexmaps.inhouse', 'ru.yandex.yandexmaps.klone': 'yandexmaps.klone',
            'ru.yandex.yandexnavi.inhouse': 'yandexnavi.inhouse', 'ru.yandex.yandexnavi.sandbox': 'yandexnavi.sandbox',
            'ru.yandex.direct': 'Директ', 'ru.yandex.mobile.transport': 'Транспорт', 'ru.yandex.yandexbus': 'Транспорт'
           }

    name = name_dict.get(AppID)
    if not name:
        name = AppID
    return name

iv_key = "pWAnOKnfLKdjE2Ej16jQcw=="
key = "yTbJ8bAI5qQrWEmwah2xYw=="


def blow_decrypt(source):
    bs = Blowfish.block_size
    s = base64.b64decode(source.replace('-', '+').replace('_', '/'))
    iv = s[:bs]
    iv_cipher = Blowfish.new(base64.b64decode(iv_key), Blowfish.MODE_CBC, "arcadia+")
    iv_decrypted = iv_cipher.decrypt(iv)
    data = s[bs:]
    data_decrypted = Blowfish.new(
        base64.b64decode(key),
        Blowfish.MODE_CBC,
        iv_decrypted
    ).decrypt(data)
    while data_decrypted[-1] == b'\x00':
        data_decrypted = data_decrypted[:-1]
    return data_decrypted.decode('utf8')

def extract_testids(adata):
    return ','.join(
        map(
            format, sorted(
                set(int(x) for x in json.loads(
                    blow_decrypt(adata)
                ).get('test-ids'))
            )
        )
    )




username = 'ensuetina'
date_format = '%Y-%m-%d'

# выбираем кластер
cluster = clusters.yt.Hahn(pool = 'search-research_ensuetina').env(templates=dict(
                                job_root='home/search-research/' + username + '/DISTRIBUTION_METRIC/PROMOLIB'
                            )
                            )
dates = [i.strftime('%Y-%m-%d' ) for i in pd.date_range('2016-09-22', '2016-10-01')]

for date in dates:
    print date
    job = cluster.job()

    log = job.table('statbox/metrika-mobile-log/' + date)


    ##########################
    ######### HITS ###########
    ##########################

    hits_service = log.qb2(log = 'metrika-mobile-log',
                           fields = ['device_id','event_type','date',
                                     se.dictitem('AppID',from_='parsed_log_line'),
                                     se.custom('service',map_name,'AppID')
                                    ],
                           filters = [sf.defined('event_type','device_id'),
                                      sf.equals('event_type','EVENT_START')
                                     ]
                          ).groupby('device_id','service','date').aggregate(hits=na.count())

    hits_total_service = hits_service.groupby('device_id',
                                              'date',
                                             ).aggregate(hits=na.sum('hits')).project(ne.all(),
                                                                                      service=ne.const('_total_')
                                                                                     )


    hits = job.concat(hits_service,hits_total_service).put('$job_root/all_hits')
    #break

    check = hits_service.groupby('service',
                                 'date'
                                ).aggregate(hits=na.sum('hits'),
                                            devices=na.count()
                                           ).put('$job_root/check_hits',append=True)


    ##########################
    ######## BANNERS #########
    ##########################

    banners_all = log.qb2(log = 'metrika-mobile-log',
                    fields = ['device_id','event_name','raw_event_value','date',
                              se.dictitem('AppID',from_='parsed_log_line'),
                              se.custom('service',map_name,'AppID')
                             ],
                    filters = [
                                sf.contains('raw_event_value','campaign_id'),
                                sf.equals('event_name','display')
                              ]
                   ).groupby('service',
                             'device_id',
                             'date',
                            ).aggregate(shows=na.count())


    distrib_total_service = banners_all.groupby('device_id',
                                                'date'
                                               ).aggregate(shows=na.sum('shows')).project(ne.all(),
                                                                                          service = ne.const('_total_')
                                                                                         )

    distrib = job.concat(banners_all,distrib_total_service).put('$job_root/distrib_shows')


    ##########################
    ########## JOIN ##########
    ##########################

    j1 = distrib.join(hits,by=('device_id','service','date'),type='inner')#.put('$job_root/joined_uids')

    joined_uids = j1.project(ne.all(),
                             coverage = ne.custom(lambda x,y: round(float(x)/float(y),2),'shows','hits'),
                             neg_cov = ne.custom(lambda x,y: -1*round(float(x)/float(y),2),'shows','hits')
                            ).filter(nf.custom(lambda x: float(x)<=1.0, 'coverage')
                                    ).project(ne.all(),plat=ne.const('touch'),distr_obj=ne.const('promolib_banner')).sort('neg_cov').put('$job_root/joined_coverage')

    aggr = joined_uids.groupby('service','plat','distr_obj').aggregate(lines = na.count(),
                                                                       uids = na.count(),
                                                                       shows = na.sum('shows')
                                                                      ).put('$job_root/lines')

    job.run()

    records = cluster.read('$job_root/lines')

    lines = {}

    for rec in records:
        plat = rec.plat
        serv = rec.service
        obj = rec.distr_obj

        k = plat + '&' + serv + '&' + obj

        lines[k] = rec.lines

    job = cluster.job()

    joined_uids = job.table('$job_root/joined_coverage')

    spu = job.table('$job_root/lines').project('plat','service','distr_obj',
                                               shows_per_user = ne.custom(lambda x,y: round(float(x)/float(y),2),'shows','uids')
                                              )

    #result = joined_uids.groupby('plat').sort('neg_cov').reduce(get_percentilles(dlines,tlines)).project(ne.all(),
    #                                                                                                     date=ne.const(date)
    #                                                                                                    ).put('$job_root/report_full',append=True)
    result_filtered = joined_uids.groupby('plat',
                                          'service',
                                          'distr_obj'
                                         ).sort('neg_cov').reduce(get_percentilles(lines)).project(ne.all(),
                                                                                                   date=ne.const(date)
                                                                                                  ).put('$job_root/report_filtered')

    rep = result_filtered.join(spu,by=('plat','service','distr_obj'),type='left')

    rep1 = rep.filter(sf.defined('shows_per_user'))
    rep2 = rep.filter(sf.not_(sf.defined('shows_per_user'))).project(ne.all(),
                                                                     shows_per_user = ne.const(-1)
                                                                    )

    report = job.concat(rep1,rep2).project(service = ne.custom(lambda x: x if x != '' else '-','service'),
                                           mean = ne.custom(lambda x: x if x != '' else '-','mean'),
                                           shows_per_user = ne.custom(lambda x: x if x != '' else '-','shows_per_user'),
                                           fielddate = ne.custom(lambda x: x if x != '' else '-','date'),
                                           where = ne.const('app'),
                                           platform = ne.custom(lambda x: x if x != '' else '-','plat'),
                                           product = ne.custom(lambda x: x if x != '' else '-','distr_obj'),
                                           testid = ne.const('no test-id'),
                                           q10 = ne.custom(lambda x: x if x != '' else '-','quantille10'),
                                           q20 = ne.custom(lambda x: x if x != '' else '-','quantille20'),
                                           q30 = ne.custom(lambda x: x if x != '' else '-','quantille30'),
                                           q40 = ne.custom(lambda x: x if x != '' else '-','quantille40'),
                                           q50 = ne.custom(lambda x: x if x != '' else '-','quantille50'),
                                           q60 = ne.custom(lambda x: x if x != '' else '-','quantille60'),
                                           q70 = ne.custom(lambda x: x if x != '' else '-','quantille70'),
                                           q80 = ne.custom(lambda x: x if x != '' else '-','quantille80'),
                                           q90 = ne.custom(lambda x: x if x != '' else '-','quantille90')
                                          ).unique('fielddate','where','platform','service','product',
                                                   'q10','q20','q30','q40','q50','q60','q70','q80',
                                                   'q90','mean','shows_per_user').put('$job_root/report')


    job.run()

    client = ns.StatfaceClient(
        proxy = 'upload.stat.yandex-team.ru',
        username = '',
        password = ''
	)

    report = ns.StatfaceReport().path('Yandex_RU/Others/distribution_metric/report_v1').scale('daily')

    report = report.client(client)

    report = report.data(cluster.read('$job_root/report'))

    report.publish()
