from maps.wikimap.stat.libs.common.lib.geobase_region import (
    geobase_id_by_shape,
    EARTH_REGION_ID,
    FILES,
    GEOBASE_JOB_MEMORY_LIMIT
)
from nile.api.v1 import (
    aggregators as na,
    extractors as ne,
    filters as nf
)
from yt.wrapper.ypath import ypath_join

from . import edit_processors
from . import edits_constants

DEFAULT_REGION_TREE = '\t10000\t'
DEFAULT_NMAP_EDITS_PATH = '//home/logfeller/logs/nmaps-edits-log/1d'
DEFAULT_NMAP_USER_PATH = '//home/logfeller/logs/nmaps-users-dump-log/1d'
DEFAULT_NMAP_MODERATION_PATH = '//home/logfeller/logs/nmaps-moderation-log/1d'


def make_fielddate(datefield):
    return str(edit_processors.convert_to_utf(datefield)).split(' ')[0]


def search_geoid_by_shape(geom):
    if geom:
        return str(geobase_id_by_shape(edit_processors.convert_to_utf(geom)))
    else:
        return EARTH_REGION_ID


def make_moderation_tasks(job, date):
    '''
    Filtered nmaps-moderation-log

    result:
    | puid | task_id | object_category | event_type | region_id | fielddate |
    |------+---------+-----------------+------------+-----------+-----------|
    |  ... |     ... |             ... |        ... |       ... |       ... |
    '''
    return job.table(
        ypath_join(DEFAULT_NMAP_MODERATION_PATH, date)
    ).filter(
        nf.not_(nf.equals('task_action', b'created'))
    ).project(
        'puid',
        'task_id',
        'object_category',
        'event_type',
        region_id=ne.custom(search_geoid_by_shape, 'geom'),
        fielddate=ne.custom(make_fielddate, 'iso_eventtime'),
        files=FILES,
        memory_limit=GEOBASE_JOB_MEMORY_LIMIT
    ).label('moderation_tasks')


def make_nmaps_user_raw(job, date):
    '''
    result
    | puid | moderation_status |
    |------+-------------------|
    |  ... |               ... |
    '''
    return job.table(
        ypath_join(DEFAULT_NMAP_USER_PATH, date)
    ).project(
        'puid',
        'moderation_status'
    ).filter(
        nf.custom(
            lambda moderation_status: moderation_status in edits_constants.USER_STATUS_LIST,
            'moderation_status'
        )
    ).label('nmaps_user_raw')


def add_paths(job, moderation_tasks_raw, nmaps_user_raw):
    '''
    Add trees for event_type, object_type and add user status (moderator, yandex-moderator, cartographer)

    moderation_tasks_raw:
    | puid | task_id | object_category | event_type | region_id | fielddate |
    |------+---------+-----------------+------------+-----------+-----------|
    |  ... |     ... |             ... |        ... |       ... |       ... |

    nmaps_user_raw:
    | puid | moderation_status |
    |------+-------------------|
    |  ... |               ... |

    moderation_tasks:
    | task_id | region_id | fielddate | puid | event_path | object_path |
    |---------+-----------+-----------+------+------------+-------------|
    |     ... |       ... |       ... |  ... |        ... |         ... |

    result:
    | task_id | region_id | fielddate | puid | event_path | object_path |      user_status |
    |---------+-----------+-----------+------+------------+-------------+------------------|
    |     ... |       ... |       ... |  ... |        ... |         ... |        moderator |
    |     ... |       ... |       ... |  ... |        ... |         ... | yandex-moderator |
    |     ... |       ... |       ... |  ... |        ... |         ... |     cartographer |
    '''
    moderation_tasks = moderation_tasks_raw.map(
        edit_processors.add_object_path_mapper
    ).label('create_object_path').map(
        edit_processors.add_event_path_mapper
    ).label('create_event_path')

    return moderation_tasks.join(
        nmaps_user_raw,
        by=('puid'),
        type='inner'
    ).project(
        'puid',
        'task_id',
        'region_id',
        'fielddate',
        'event_path',
        'object_path',
        user_status='moderation_status'
    ).label('moderation_with_paths')


def prepare_result_counts(stream):
    '''
    Calculating the result of statistics.
    Stream is aggregated by 3 unique values: commit_id, object_id, and puid.

    stream
    | fielddate | region_tree | event_path | object_path | task_id | puid | user_status |
    |-----------+-------------+------------+-------------+---------+------+-------------|
    |       ... |         ... |        ... |         ... |     ... |  ... |         ... |

    stream_count_uniq_task_id
    | fielddate | region_tree | event_path | object_path | sum_mod_task |
    |-----------+-------------+------------+-------------+--------------|
    |       ... |         ... |        ... |         ... |          ... |

    stream_count_uniq_moderator
    | fielddate | region_tree | event_path | object_path | sum_moderator |
    |-----------+-------------+------------+-------------+---------------|
    |       ... |         ... |        ... |         ... |           ... |

    stream_count_uniq_yandex_moderator
    | fielddate | region_tree | event_path | object_path | sum_yandex_moderator |
    |-----------+-------------+------------+-------------+----------------------|
    |       ... |         ... |        ... |         ... |                  ... |

    stream_count_uniq_cartographer
    | fielddate | region_tree | event_path | object_path | sum_cartographer |
    |-----------+-------------+------------+-------------+------------------|
    |       ... |         ... |        ... |         ... |              ... |

    result
    | fielddate | region_tree | event_path | object_path | sum_mod_task | sum_moderator | sum_yandex_moderator | sum_cartographer |
    |-----------+-------------+------------+-------------+--------------+---------------+----------------------+------------------|
    |       ... |         ... |        ... |         ... |          ... |           ... |                  ... |              ... |
    '''
    stream_count_uniq_task_id = stream.unique(
        'fielddate',
        'region_tree',
        'event_path',
        'object_path',
        'task_id'
    ).groupby(
        'fielddate',
        'region_tree',
        'event_path',
        'object_path'
    ).aggregate(
        sum_mod_task=na.count()
    ).label('aggregate_by_task_id')

    stream_count_uniq_moderator = stream.filter(
        nf.equals('user_status', b'moderator')
    ).unique(
        'fielddate',
        'region_tree',
        'event_path',
        'object_path',
        'task_id'
    ).groupby(
        'fielddate',
        'region_tree',
        'event_path',
        'object_path'
    ).aggregate(
        sum_moderator=na.count()
    ).label('aggregate_by_moderator')

    stream_count_uniq_yandex_moderator = stream.filter(
        nf.equals('user_status', b'yandex-moderator')
    ).unique(
        'fielddate',
        'region_tree',
        'event_path',
        'object_path',
        'task_id'
    ).groupby(
        'fielddate',
        'region_tree',
        'event_path',
        'object_path'
    ).aggregate(
        sum_yandex_moderator=na.count()
    ).label('aggregate_by_yandex_moderator')

    stream_count_uniq_cartographer = stream.filter(
        nf.equals('user_status', b'cartographer')
    ).unique(
        'fielddate',
        'region_tree',
        'event_path',
        'object_path',
        'task_id'
    ).groupby(
        'fielddate',
        'region_tree',
        'event_path',
        'object_path'
    ).aggregate(
        sum_cartographer=na.count()
    ).label('aggregate_by_cartographer')

    return stream_count_uniq_task_id.join(
        stream_count_uniq_moderator,
        by=('fielddate',
            'region_tree',
            'event_path',
            'object_path'
            ),
        type='left'
    ).join(
        stream_count_uniq_yandex_moderator,
        by=('fielddate',
            'region_tree',
            'event_path',
            'object_path'
            ),
        type='left'
    ).join(
        stream_count_uniq_cartographer,
        by=('fielddate',
            'region_tree',
            'event_path',
            'object_path'
            ),
        type='left'
    ).project(
        'fielddate',
        'event_path',
        'object_path',
        sum_mod_task=ne.custom(
            lambda sum_mod_task: sum_mod_task or 0,
            'sum_mod_task'
        ),
        sum_moderator=ne.custom(
            lambda sum_moderator: sum_moderator or 0,
            'sum_moderator'
        ),
        sum_yandex_moderator=ne.custom(
            lambda sum_yandex_moderator: sum_yandex_moderator or 0,
            'sum_yandex_moderator'
        ),
        sum_cartographer=ne.custom(
            lambda sum_cartographer: sum_cartographer or 0,
            'sum_cartographer'
        ),
        geo_path='region_tree'
    ).label('prepare_result_counts')


def set_region_tree(stream, major_regions):
    return stream.join(
        major_regions,
        by='region_id',
        type='left',
        assume_small_right=True,
        allow_undefined_keys=False,
        assume_defined=True,
        memory_limit=8 * 1024
    ).project(
        ne.all(),
        region_tree=ne.custom(
            lambda rt: rt or DEFAULT_REGION_TREE,
            'region_tree'
        )
    )
