from maps.wikimap.stat.tasks_payment.dictionaries.tariff.schema import TARIFF_RUB_PER_SEC

from nile.api.v1 import Record, aggregators, extractors

from yt.wrapper.ypath import ypath_join

from cyson import UInt


_TASK_TARIFF_MAP_DIR = '//home/maps-qa/analytics/tasks_payment/dictionaries/task_tariff_map'
_TRACKER_LOG_DIR = '//home/maps-qa/analytics/tasks_payment/tasks_logs/tracker_log'
_STAFF_DUMP_PATH = '//home/maps-qa/analytics/tasks_payment/dictionaries/staff_dump'

_TREE_ROOT = b'all'
_DEFAULT_TREE = [_TREE_ROOT, b'unknown']
_DEFAULT_SECONDS_PER_TASK = 0

_FULL_ACCESS_GROUP = b'full_access_group'


def _tree_name_from_path(path):
    return b'\t' + b'\t'.join(path) + b'\t'


def _tree_names_for_unknown_values(tree_name_col, value_col):
    def make_tree_names(records):
        for record in records:
            if record.get(tree_name_col) is not None:
                yield record
                continue

            value = record.get(value_col)
            if type(value) is str:
                value = value.encode('utf-8')
            elif type(value) is not bytes:
                value = str(value).encode('utf-8')

            tree = _DEFAULT_TREE + [value]

            for level in range(len(tree)):
                yield Record(
                    record,
                    {
                        tree_name_col: _tree_name_from_path(tree[0:level + 1])
                    }
                )

    return make_tree_names


def _make_user_name_path(record):
    '''
    Returns path:
    - 'all/primary_department/last_name first_name (login)'; or
    - 'all/unknown/staff_uid' if there is no information about user name and
      login.
    '''
    login = record.get('login')
    if login is not None:
        return [
            _TREE_ROOT,
            record.primary_department,
            record.last_name + b' ' + record.first_name + b' (' + login + b')'
        ]
    return _DEFAULT_TREE + [str(record.staff_uid).encode()]


def _make_acl(login, is_leaf):
    if not is_leaf or login is None:
        return _FULL_ACCESS_GROUP

    return _FULL_ACCESS_GROUP + b',@' + login


def _add_staff_info(iso_date, log, staff_dump):
    '''
    log:
    | iso_datetime | staff_uid | task_id | quantity |
    |--------------+-----------+---------+----------|
    | ...          | ...       | ...     | ...      |

    staff_dump:
    | uid | login | last_name | first_name | primary_department | ... |
    |-----+-------+-----------+------------+--------------------+-----|
    | ... | ...   | ...       | ...        | ...                | ... |

    result:
    | fielddate | user_name_tree                                      | task_id | quantity | _acl                       |
    |-----------+-----------------------------------------------------+---------+----------+----------------------------|
    | iso_date  | All                                                 | ...     | ...      | _FULL_ACCESS_GROUP         |
    | iso_date  | all/primary_department                              | ...     | ...      | _FULL_ACCESS_GROUP         |
    | iso_date  | all/primary_department/last_name first_name (login) | ...     | ...      | _FULL_ACCESS_GROUP, @login |
    | iso_date  | ...                                                 | ...     | ...      | ...                        |
    | iso_date  | all                                                 | ...     | ...      | _FULL_ACCESS_GROUP         |
    | iso_date  | all/unknown                                         | ...     | ...      | _FULL_ACCESS_GROUP         |
    | iso_date  | all/unknown/staff_uid                               | ...     | ...      | _FULL_ACCESS_GROUP         |
    '''

    def make_user_name_tree(records):
        for record in records:
            user_name_tree = _make_user_name_path(record)
            for tree_depth in range(len(user_name_tree)):
                is_leaf = (tree_depth == len(user_name_tree) - 1)
                yield record.transform(
                    'last_name', 'first_name', 'primary_department', 'login', 'staff_uid',  # exclude
                    user_name_tree=_tree_name_from_path(user_name_tree[:tree_depth + 1]),
                    _acl=_make_acl(record.get('login'), is_leaf)
                )

    # staff_dump has no schema, so extract uid as uint to join it correctly with log.
    staff_dump = staff_dump.project(
        extractors.all(exclude=['uid']),
        staff_uid=extractors.custom(lambda uid: UInt(uid), 'uid')
    )

    return log.join(
        staff_dump,
        type='left',
        by='staff_uid'
    ).project(
        'task_id', 'quantity', 'last_name', 'first_name', 'primary_department', 'login', 'staff_uid',
        fielddate=extractors.const(iso_date.encode())
    ).map(make_user_name_tree)


def _add_tariffs(log_with_staff_info, task_tariff_map):
    '''
    log_with_staff_info:
    | fielddate | user_name_tree | task_id | quantity | _acl |
    |-----------+----------------+---------+----------+------|
    | ...       | ...            | ...     | ...      | ...  |

    task_tariff_map:
    | task_id | task_name_tree | seconds_per_task |
    |---------+----------------+------------------|
    |     ... |            ... |              ... |

    Result:
    | fielddate | user_name_tree | task_name_tree | quantity | time_spent_sec | cost_rub | _acl |
    |-----------+----------------+----------------+----------+----------------+----------+------|
    | ...       | ...            | ...            | ...      | ...            | ...      | ...  |
    '''

    return log_with_staff_info.join(
        task_tariff_map,
        type='left',
        by='task_id'
    ).map(
        _tree_names_for_unknown_values(
            tree_name_col='task_name_tree', value_col='task_id'
        )
    ).project(
        extractors.all(exclude=['seconds_per_task', 'task_id']),
        time_spent_sec=extractors.custom(
            lambda seconds, quantity: quantity * (seconds if seconds is not None else _DEFAULT_SECONDS_PER_TASK),
            'seconds_per_task', 'quantity'
        ),
        cost_rub=extractors.custom(
            lambda time_spent_sec: time_spent_sec * TARIFF_RUB_PER_SEC,
            'time_spent_sec'
        )
    )


def _aggregate_tasks(log_with_tariffs):
    '''
    Aggregates data by `fielddate`, `user_name_tree`, `task_name_tree` and
    `_acl`. Values in other columns are summed.

    log_with_tariffs:
    | fielddate | user_name_tree | task_name_tree | quantity | time_spent_sec | cost_rub | _acl |
    |-----------+----------------+----------------+----------+----------------+----------+------|
    | ...       | ...            | ...            | ...      | ...            | ...      | ...  |

    Result:
    | fielddate | user_name_tree | task_name_tree | quantity_total | time_spent_total_sec | cost_total_rub | _acl |
    |-----------+----------------+----------------+----------------+----------------------+----------------+------|
    | ...       | ...            | ...            | SUM(quantity)  | SUM(time_spent_sec)  | SUM(cost_rub)  | ...  |
    '''

    return log_with_tariffs.groupby(
        'fielddate', 'user_name_tree', 'task_name_tree', '_acl'
    ).aggregate(
        quantity_total=aggregators.sum('quantity'),
        time_spent_total_sec=aggregators.sum('time_spent_sec'),
        cost_total_rub=aggregators.sum('cost_rub')
    )


def make_report_job(job, iso_date, report):
    log = job.table(ypath_join(_TRACKER_LOG_DIR, iso_date)).label('log')
    staff_dump = job.table(ypath_join(_STAFF_DUMP_PATH, iso_date)).label('staff_dump')
    log_with_staff_info = _add_staff_info(iso_date, log, staff_dump).label('log_with_staff_info')

    task_tariff_map = job.table(ypath_join(_TASK_TARIFF_MAP_DIR, iso_date)).label('task_tariff_map')
    log_with_tariffs = _add_tariffs(log_with_staff_info, task_tariff_map).label('log_with_tariffs')

    result = _aggregate_tasks(log_with_tariffs).label('result')
    result.publish(report)

    return job
