from nile.api.v1 import (
    aggregators as na,
    datetime as nd,
    extractors as ne,
    utils as nu,

    modified_schema,
)
from qb2.api.v1 import (
    extractors as qe,
    filters as qf,
    typing
)

DATASET_SCHEMA = {
    'entity_domain': typing.Unicode,
    'date':          typing.Unicode,

    'payment':       typing.Unicode,
    'involvement':   typing.Unicode,
    'group':         typing.Unicode,
    'person':        typing.Unicode,

    'fixed_count':   typing.Int64,
    'total_count':   typing.Int64,
}


def _incorrect_unit_filter():
    return qf.or_(
        qf.and_(
            qf.compare('basic_incorrect', '>', other_field='basic_correct'),
            qf.not_(qf.defined('last_expert_value')),
        ),
        qf.yql_custom('CAST(Yson::ConvertToString(Yson::Parse($p0)) AS Utf8) = \'incorrect\'', 'last_expert_value'),
    )


def _get_incorrect_units(unit_table, min_date, max_date):
    _DATE_LEN = len('YYYY-MM-DD')

    return unit_table.filter(
        qf.compare('action_at', '>=', min_date),
        qf.compare('action_at', '<', nd.next_day(max_date).decode()),
        _incorrect_unit_filter(),
    ).project(
        'entity_domain',
        'fixed_at',
        qe.custom('date', lambda action_at: action_at[:_DATE_LEN], 'action_at').with_type(typing.Unicode),
        puid='action_by',
    )


def _is_staff_uid(uid):
    is_test_yandex_team = uid >= 1100000000000000 and uid < 1110000000000000
    is_prod_yandex_team = uid >= 1120000000000000 and uid < 1130000000000000
    return is_test_yandex_team or is_prod_yandex_team


@nu.with_hints(modified_schema())
def _coalesce_person_mapper(records):
    for record in records:
        if record.get('person') is not None:
            yield record
        elif not _is_staff_uid(record['puid']):
            yield record.transform(
                payment='free',
                involvement='user',
                group='common',
                person='common user',
            )
        else:
            yield record.transform(
                payment='unknown',
                involvement='staff',
                group='unknown',
                person=f'unknown ({record.puid})',
            )


def _add_puid_info(units, puid_info):
    return units.join(
        puid_info,
        by=('date', 'puid'),
        type='left',
    ).map(
        _coalesce_person_mapper
    ).project(
        ne.all(exclude='puid'),
    )


def _count_unit_fixes(units_with_puid_info):
    return units_with_puid_info.groupby(
        'entity_domain',
        'date',

        'payment',
        'involvement',
        'group',
        'person',
    ).aggregate(
        fixed_count=na.count(qf.defined('fixed_at')),
        total_count=na.count(),
    ).project(
        ne.all('fixed_count'),
        qe.coalesce('fixed_count', 'fixed_count', 0).with_type(typing.Int64),
    )


def make_dataset(min_date, max_date, unit_table, puid_info):
    return unit_table.call(
        _get_incorrect_units, min_date, max_date
    ).call(
        _add_puid_info, puid_info
    ).call(
        _count_unit_fixes
    ).cast(
        **DATASET_SCHEMA
    )
