#!/usr/bin/env python
# -*- coding: UTF-8 -*-
"""
Apprec Experiments Statface
"""

import datetime
import logging
import os
from datetime import date, timedelta

from bson import ObjectId
from nile.api.v1 import clusters, filters as nf, aggregators as na, statface as ns, files as nfl
from nile.api.v1.datetime import date_range
from qb2.api.v1 import filters as qf, extractors as qe

# logging.basicConfig(level=logging.WARN)
from common import get_stat_client

logger = logging.getLogger(__name__)
_handler = logging.FileHandler(os.path.join(os.path.dirname(os.path.abspath(__file__)), 'log.txt'))
formatter = logging.Formatter('%(asctime)s %(levelname)s %(filename)s %(message)s')
_handler.setFormatter(formatter)
logger.addHandler(_handler)
logger.setLevel(logging.WARNING)

APIKEY = '37460'
EVENTNAMES = set(['rec_view', 'rec_click', 'rec_install'])
WEEKS_RECALC_BACK = 8
DATE_END = date.today() - timedelta(days=1)
DATE_START = DATE_END - timedelta(weeks=WEEKS_RECALC_BACK)
DATES_LIST = list(date_range(DATE_START, DATE_END, step=1, stringify=True))
# '{2017-01-01..2017-10-31}'
MAGIC_DATE_RANGE_STRING = '..'.join([DATE_START.isoformat(), DATE_END.isoformat()]).join(['{', '}'])
JOB_ROOT = 'home/advisor/chikachoff/apprec_exp'
OUTPUT_TABLE = '$job_root/data'
RAW_LOG_TABLE = '//logs/yandex-phone-metrika-mobile-log/1d/@date'
USERNAME = 'chikachoff'
YT_TOKEN = os.environ['YT_TOKEN']

CLUSTER = clusters \
    .Hahn(pool='search-research_{}'.format(USERNAME), token=YT_TOKEN) \
    .env(
        templates=dict(job_root=JOB_ROOT, date=MAGIC_DATE_RANGE_STRING)
    )
JOB = CLUSTER.job()
LOG = JOB.table(RAW_LOG_TABLE)


def get_adnetwork(event_value):
    offer_id = event_value.get('offer_id') or 'empty'
    if offer_id != 'empty':
        offer_id = offer_id.split('_')[0]
    return offer_id


def get_generation_date(event_value):
    impression_id = event_value.get('impression_id')
    if impression_id:
        try:
            return ObjectId(impression_id.split(':')[2]).generation_time.date().strftime('%Y-%m-%d')
        except:
            return 'InvalidId'
    else:
        return 'empty impression_id'


def get_age(event_date, generation_date):
    try:
        return (datetime.datetime.strptime(event_date, '%Y-%m-%d') - datetime.datetime.strptime(generation_date, '%Y-%m-%d')).days
    except ValueError:
        return 99999

def get_server_version(event_value):
    try:
        return event_value.get('impression_id').split(':')[6]
    except (IndexError, AttributeError):
        return 'empty'


RECORDS = LOG.qb2(
    log='metrika-mobile-log',
    fields=['event_date', 'event_name',
        qe.mobile.event_value(name='event_value'),
    ],
    filters=[
        qf.default_filtering('metrika-mobile-log'),
        qf.equals('event_type', 'EVENT_CLIENT'),
        qf.equals('api_key_str', APIKEY),
        qf.one_of('event_name', EVENTNAMES),
        qf.one_of('event_date', list(DATES_LIST))
    ]) \
    .project(
        'event_name',
        qe.custom('adnetwork_', get_adnetwork, 'event_value'),
        qe.custom('generation_date', get_generation_date, 'event_value'),
        qe.custom('experiment_', lambda x: x.get('impression_id', ':').split(':')[1], 'event_value'),
        qe.custom('server_version_', get_server_version, 'event_value'),
        fielddate='event_date',
        files=[nfl.Package('bson', path='.')]
    ) \
    .project(
        'event_name', 'adnetwork_', 'experiment_', 'fielddate', 'server_version_',
        qe.custom('age_', get_age, 'fielddate', 'generation_date'),
    ) \
    .filter(nf.custom(lambda x: x < 10 and x >= 0, 'age_')) \
    .project(
        'event_name', 'fielddate',
        qe.unfold_with_total('adnetwork', 'adnetwork_', total='_total_'),
        qe.unfold_with_total('age', 'age_', total='_total_'),
        qe.unfold_with_total('experiment', 'experiment_', total='_total_'),
        qe.unfold_with_total('server_version', 'server_version_', total='_total_')
    ) \
    .groupby('adnetwork', 'age', 'experiment', 'fielddate', 'server_version') \
    .aggregate(
        views=na.count(predicate=qf.equals('event_name', 'rec_view')),
        clicks=na.count(predicate=qf.equals('event_name', 'rec_click')),
        installs=na.count(predicate=qf.equals('event_name', 'rec_install'))
    ) \
    .sort('fielddate', 'views') \
    .put(OUTPUT_TABLE)
JOB.run()

# задаём конфигурацию отчёта на Статфейсе
REPORT = (
    ns.StatfaceReport()
    # задаем путь отчета, тайтл и детализацию по дням
    .path('Mobile_Soft_Launcher/apprec_experiments')
    .title('apprec experiments')
    .scale('daily')
    # описываем измерения отчета
    .dimensions(
        ns.Date('fielddate').replaceable(),
        ns.StringSelector('adnetwork').replaceable(),
        ns.StringSelector('age').replaceable(),
        ns.StringSelector('experiment').replaceable(),
        ns.StringSelector('server_version').replaceable(),
    )
    # описываем метрики отчета
    .measures(
        # задаем:
        # - типы отображения полей (view types в Статфейсе)
        # - тайтлы полей
        # - параметры видимости (показывать ли метрику по умолчанию или нет)
        ns.Integer('views').title('views'),
        ns.Integer('clicks').title('clicks'),
        ns.Integer('installs').title('installs'),
    )
)
# создаем клиент для публикации
client = get_stat_client()
(
    # связываем клиент с отчетом
    REPORT.client(client)
    # выгружаем результаты расчета и связываем их с отчетом
    .data(RECORDS.read())
    # публикуем данные на Статфейс
    .publish()
)

# Логируем успех
logger.warn('SUCCESS')
