#!/usr/bin/env python
# -*- coding: utf-8 -*-
from argparse import ArgumentParser
from datetime import (
    date,
    timedelta,
)
import os

from nile.api.v1 import (
    aggregators as na,
    filters as nf,
    Record,
)
from passport.backend.profile import get_cluster
from passport.backend.profile.utils.helpers import from_str_to_date
from passport.backend.profile.utils.yt import ExclusiveLock
from qb2.api.v1 import (
    extractors as se,
    filters as sf,
)
from retrying import retry
from yt.wrapper.errors import YtIncorrectResponse


def is_registration_record(record):
    return (
        record.action and
        record.action.startswith(('account_register', 'account_create')) and
        record.consumer == 'passport' and
        # Почему так - нам интересны финальные записи о регистрации в лог, а не event=account_modification entity=...
        record.entity is None
    )


def is_within_experiment(track_id):
    return int(bool(track_id and track_id[:2] in ['00', '01', '02']))


def track_reducer(groups):
    for key, records in groups:
        surname = reg_record = None
        is_hidden_form_filled = False
        for record in records:
            action = record.action
            if is_registration_record(record):
                reg_record = record
            if action == 'hidden_form_filled':
                is_hidden_form_filled = True
                surname = record.surname
        if not reg_record:
            continue

        yield Record(
            track_id=key.track_id,
            uid=reg_record.uid,
            reg_karma_value=reg_record.karma,
            is_within_experiment=is_within_experiment(key.track_id),
            is_hidden_form_filled=int(is_hidden_form_filled),
            is_surname_passed=int(bool(surname and surname != 'empty')),
            surname=surname,
        )


def uid_reducer(groups):
    for key, records in groups:
        reg_record = None
        karma_records = []
        for record in records:
            if is_registration_record(record):
                reg_record = record
            if (
                record.entity == 'karma' and
                # Первая запись кармы - мусорная, т.к. аккаунт только зарегистрировали и еще не сходили в ФО
                # Также нам не нужна запись при удалении аккаунта
                not ((record.old == '-' and record.new == '0') or record.new == '-')
            ):
                karma_records.append(record)
        if not reg_record:
            continue

        karma_records.sort(key=lambda rec: rec.unixtime)
        karma_updates = []
        for rec in karma_records:
            karma_updates.append([rec.new, rec.unixtime - reg_record.unixtime])

        yield Record(
            uid=key.uid,
            karma_updates=karma_updates,
            is_karma_calculated=int(bool(karma_updates)),
            final_karma_value=karma_updates[-1][0] if karma_updates else reg_record.karma,
        )


@retry(stop_max_attempt_number=3, wait_fixed=5000, retry_on_exception=(YtIncorrectResponse,))
def robot_reg_stats(config, input_dir, output_dir, date_start, date_end, cluster_name=None):
    """
    PASSP-13618
    """
    with ExclusiveLock(config=config, lock_path=config['yt']['robot_reg_info_daily_lock']):
        cluster = get_cluster(config).env()

        job = cluster.job()
        passport_log = job.table(os.path.join(input_dir, '{%s..%s}' % (date_start, date_end)))

        filters = [
            sf.or_(
                sf.and_(
                    sf.defined('action'),
                    sf.custom(
                        lambda action: action.startswith(
                            ('account_register', 'account_create', 'hidden_form_filled'),
                        ),
                        'action',
                    ),
                ),
                sf.equals('entity', 'karma'),
            ),
        ]

        events = passport_log.qb2(
            log='passport-log',
            fields=[
                se.integer_log_field('uid').allow_override(),
                se.integer_log_field('unixtime').allow_override(),
                se.log_fields('entity', 'new', 'old', 'mode', 'action', 'track_id', 'karma', 'consumer', 'surname'),
            ],
            filters=filters,
        )

        # По треку регистрации определяем эксперимент и признак роботной регистрации
        events_by_track = events.groupby(
            'track_id',
        ).reduce(
            track_reducer,
        )

        # По uid вытаскиваем отложенные обновления кармы
        events_by_uid = events.groupby(
            'uid',
        ).reduce(uid_reducer)

        events_joined = events_by_track.join(events_by_uid, by='uid', type='left')

        # Просто дамп данных по роботным регистрациям
        robot_regs_output = os.path.join(output_dir, 'robot_reg_%s_%s' % (date_start, date_end))
        events_joined.filter(
            nf.equals('is_hidden_form_filled', 1),
        ).put(robot_regs_output)

        # Статистика по роботным регистрациям: значения кармы на регистрации, обновленные значения кармы,
        # признак передачи видимого поля, признак вычисления кармы.
        robot_karma_stat = events_joined.filter(
            nf.equals('is_hidden_form_filled', 1),
        ).aggregate(
            robot_reg_karma_values=na.histogram('reg_karma_value'),
            robot_final_karma_values=na.histogram('final_karma_value'),
            is_surname_passed=na.histogram('is_surname_passed'),
            is_karma_calculated=na.histogram('is_karma_calculated'),
        )

        # Доля роботов в эксперименте, размер эксперимента
        robot_experiment_stat = events_by_track.filter(
            nf.equals('is_within_experiment', 1),
        ).aggregate(
            is_robot_reg_within_experiment=na.histogram('is_hidden_form_filled'),
        )

        robot_stat_output = os.path.join(output_dir, 'robot_stat_%s_%s' % (date_start, date_end))
        robot_karma_stat.join(
            robot_experiment_stat,
            type='full',
            by=tuple(),
            assume_unique=True,
        ).put(robot_stat_output)

        job.run()


def _parse_args(argv):
    parser = ArgumentParser()
    parser.add_argument(
        'date_start',
        type=lambda string: from_str_to_date(string),
        nargs='?',
        default=date.today() - timedelta(days=1),
    )
    parser.add_argument(
        'date_end',
        type=lambda string: from_str_to_date(string),
        nargs='?',
        default=date.today() - timedelta(days=1),
    )
    parser.add_argument(
        '--cluster',
        nargs='?',
        default='hahn',
    )
    return parser.parse_args(argv[1:])
