#!/usr/bin/env python
# -*- coding: UTF-8 -*-
import os
import sys
sys.path.insert(1, os.path.join(sys.path[0], '../..'))

from datetime import date, datetime, timedelta
from nile.api.v1 import (
    aggregators as na,
    clusters,
    extractors as ne,
    filters as nf,
    statface as ns
)
from nile.api.v1.datetime import date_range
from nile.files import LocalFile
from qb2.api.v1 import filters as qf, extractors as qe
from common import (
    APPMETRIKA_LOG_PATH,
    JOB_ROOT,
    LAUNCHER_APIKEY,
    USERNAME,
    get_clid1,
    validate_unicode,
    get_stat_client)


weeks_recalc_back = 2
date_format = '%Y-%m-%d'
date_end = date.today() - timedelta(days=1)
date_start = date_end - timedelta(weeks=weeks_recalc_back)
dates_list = list(date_range(date_start, date_end, step=1, stringify=True))
# '{2017-01-01..2017-10-31}'
magic_date_range_string = '..'.join(
    [date_start.isoformat(), date_end.isoformat()]
).join(['{', '}'])
output_table = '$job_root/retention'
yt_token = os.environ['YT_TOKEN']

common_path = os.path.join(sys.path[1], 'common.py')
cluster = clusters \
    .Hahn(pool='{}'.format(USERNAME), token=yt_token) \
    .env(
        templates=dict(job_root=JOB_ROOT, date=magic_date_range_string),
        packages=[LocalFile(common_path)]
    )
job = cluster.job()


def get_retention_day(event_date, install_date):
    event_date = datetime.strptime(event_date, date_format)
    install_date = datetime.strptime(install_date, date_format)
    return (event_date - install_date).days



def prepare_sample(job):
    log = job.table(APPMETRIKA_LOG_PATH)
    prepared = log.qb2(
        log='metrika-mobile-log',
        fields=[
            'device_id',
            'event_date',
            'event_type',
            'session_type',
            qe.log_field('Clids_Names', default="[]").rename('clids_names'),
            qe.log_field('Clids_Values', default="[]").rename('clids_values'),
            qe.log_field('Model', default='unknown'),
            qe.log_field('Manufacturer', default='unknown'),
            qe.custom(
                'manufacturer_model',
                lambda man, model: ' - '.join([man, model]),
                'Manufacturer', 'Model'
            ),
        ],
        filters=[
            qf.equals('api_key_str', LAUNCHER_APIKEY),
            qf.one_of('event_type', {
                'EVENT_CLIENT',
                'EVENT_STATBOX'
            }),
            qf.equals('session_type', 'SESSION_FOREGROUND'),
            qf.compare('event_date', '>=', dates_list[0]),
            qf.compare('event_date', '<=', dates_list[-1]),
        ]
    )

    installs = prepared \
        .filter(qf.equals('event_name', 'first_app_launch')) \
        .project(
            'device_id',
            install_date='event_date'
        ).unique('device_id', 'install_date') \
        .checkpoint(
            'installs',
            path='home/advisor/chikachoff/tmp/retention_installs_checkpoint'
        )

    audience = prepared \
        .project(
            'device_id',
            fielddate='event_date',
            clid1=ne.custom(get_clid1, 'clids_names', 'clids_values'),
            manmodel=ne.custom(validate_unicode, 'manufacturer_model')
        ) \
        .filter(nf.not_(nf.equals('clid1', 'Empty'))) \
        .unique('device_id', 'fielddate') \
        .checkpoint(
            'audience',
            path='home/advisor/chikachoff/tmp/retention_audience_checkpoint'
        )

    result = audience \
        .join(installs, by='device_id', type='inner') \
        .project(
            ne.all(),
            retention_day=ne.custom(
                get_retention_day,
                'fielddate', 'install_date'
            )
        ) \
        .checkpoint(
            'joined',
            path='home/advisor/chikachoff/tmp/retention_joined_checkpoint'
        ) \
        .filter(nf.custom(lambda x: x >= 0, 'retention_day')) \
        .project(
            'device_id', 'retention_day',
            qe.unfold_with_total('clid', 'clid1', total='_total_'),
            qe.unfold_with_total('model', 'manmodel', total='_total_'),
            fielddate='install_date',
        ) \
        .checkpoint(
            'preaggregate',
            path='home/advisor/chikachoff/tmp/retention_preaggr_checkpoint'
        ) \
        .groupby('fielddate', 'model', 'clid') \
        .aggregate(
            installs=na.count_distinct(
                'device_id',
                predicate=nf.equals('retention_day', 0)
            ),
            day1=na.count_distinct(
                'device_id',
                predicate=nf.equals('retention_day', 1)
            ),
            day2=na.count_distinct(
                'device_id',
                predicate=nf.equals('retention_day', 2)
            ),
            day3=na.count_distinct(
                'device_id',
                predicate=nf.equals('retention_day', 3)
            ),
            day7=na.count_distinct(
                'device_id',
                predicate=nf.equals('retention_day', 7)
            ),
        ) \
        .filter(nf.custom(lambda x: x > 10, 'installs')) \
        .put(output_table)

    job.run()
    return result

result = prepare_sample(job)

report = (
    ns.StatfaceReport()
    .path('Mobile_Soft_Launcher/retention_slices')
    .title('retention_slices')
    .scale('daily')
    .dimensions(
        ns.Date('fielddate'),
        ns.StringSelector('clid'),
        ns.StringSelector('model'),
    )
    .measures(
        ns.Integer('installs'),
        ns.Integer('day1'),
        ns.Integer('day2'),
        ns.Integer('day3'),
        ns.Integer('day7'),
    )
)

client = get_stat_client()

(
    report.client(client)
    .data(result.read())
    .publish()
)
