from maps.wikimap.stat.libs.common.lib.geobase_region import (
    geobase_id_by_shape,
    EARTH_REGION_ID,
    FILES,
    GEOBASE_JOB_MEMORY_LIMIT
)
from nile.api.v1 import (
    aggregators as na,
    extractors as ne
)
from yt.wrapper.ypath import ypath_join

from . import edit_processors

DEFAULT_REGION_TREE = '\t10000\t'
DEFAULT_NMAP_EDITS_PATH = '//home/logfeller/logs/nmaps-edits-log/1d'
DEFAULT_NMAP_USER_PATH = '//home/logfeller/logs/nmaps-users-dump-log/1d'


def make_fielddate(datefield):
    return str(edit_processors.convert_to_utf(datefield)).split(' ')[0]


def search_geoid_by_shape(geom):
    if geom:
        return str(geobase_id_by_shape(edit_processors.convert_to_utf(geom)))
    else:
        return EARTH_REGION_ID


def make_edits_count_per_user(job, date):
    '''
    result:
    | edits | puid |
    |-------+------|
    |   ... |  ... |
    '''
    dates = '{2014-10-08..' + str(date) + '}'
    return job.table(
        ypath_join(DEFAULT_NMAP_EDITS_PATH, dates),
        ignore_missing=True
    ).groupby('puid').aggregate(
        edits=na.count()
    ).label('edits_count_per_user')


def make_nmaps_edits_raw(job, date):
    '''
    result
    | action | commit_id | geom | object_category | object_id | puid | fielddate | region_id |
    |--------+-----------+------+-----------------+-----------+------+-----------+-----------|
    |    ... |       ... |  ... |             ... |       ... |  ... |       ... |       ... |
    '''
    return job.table(
        ypath_join(DEFAULT_NMAP_EDITS_PATH, date)
    ).project(
        'action',
        'commit_id',
        'geom',
        'object_category',
        'object_id',
        'puid',
        fielddate=ne.custom(make_fielddate, 'iso_eventtime'),
        region_id=ne.custom(search_geoid_by_shape, 'geom'),
        files=FILES,
        memory_limit=GEOBASE_JOB_MEMORY_LIMIT
    ).label('nmaps_edits_raw')


def make_nmaps_user_raw(job, date):
    '''
    result
    | puid | moderation_status |
    |------+-------------------|
    |  ... |               ... |
    '''
    return job.table(
        ypath_join(DEFAULT_NMAP_USER_PATH, date)
    ).project(
        'puid',
        'moderation_status'
    ).label('nmaps_user_raw')


def add_paths(job, edits_count_per_user, nmaps_edits_raw, nmaps_user_raw):
    '''
    Add trees for action with objects, user status and object groups

    edits_count_per_user:
    | edits | puid |
    |-------+------|
    |    .. |  ... |

    nmaps_edits_raw:
    | action | commit_id | geom | iso_eventtime | object_category | object_id | puid | fielddate | region_id |
    |--------+-----------+------+---------------+-----------------+-----------+------+-----------+-----------|
    |    ... |       ... |  ... |           ... |             ... |       ... |  ... |       ... |       ... |

    nmaps_user_raw:
    | puid | moderation_status | iso_eventtime | fielddate |
    |------+-------------------+---------------+-----------|
    |  ... |               ... |           ... |       ... |

    nmaps_edits_action:
    | commit_id | region_id | fielddate | puid | object_id | action_path |
    |-----------+-----------+-----------+------+-----------+-------------|
    |       ... |       ... |       ... |  ... |       ... |         ... |

    nmaps_edits_object:
    | commit_id | region_id | fielddate | puid | object_id | object_path |
    |-----------+-----------+-----------+------+-----------+-------------|
    |       ... |       ... |       ... |  ... |       ... |         ... |

    nmaps_users_status:
    | puid | status_path |
    |------+-------------|
    |  ... |         ... |
    '''
    nmaps_edits_action = nmaps_edits_raw.map(
        edit_processors.add_action_path_mapper
    ).label('create_action_path')

    nmaps_edits_object = nmaps_edits_raw.map(
        edit_processors.add_object_path_mapper
    ).label('create_object_path')

    nmaps_users_status = edits_count_per_user.join(
        nmaps_user_raw,
        by=('puid'),
        type='inner'
    ).project(
        'puid',
        'moderation_status',
        edits=ne.custom(
            lambda edits: edits if edits else 0,
            'edits'
        )
    ).label(
        'join_for_status_path'
    ).map(
        edit_processors.add_status_path_mapper
    ).label('create_status_path')

    return nmaps_edits_action, nmaps_edits_object, nmaps_users_status


def make_nmaps_edits_for_count(job, nmaps_edits_action, nmaps_edits_object, nmaps_users_status):
    '''
    Create aggregate big table with all information about edits.

    nmaps_edits_action:
    | commit_id | region_id | fielddate | puid | object_id | action_path |
    |-----------+-----------+-----------+------+-----------+-------------|
    |       ... |       ... |       ... |  ... |       ... |         ... |

    nmaps_edits_object:
    | commit_id | region_id | fielddate | puid | object_id | object_path |
    |-----------+-----------+-----------+------+-----------+-------------|
    |       ... |       ... |       ... |  ... |       ... |         ... |

    nmaps_users_status:
    | puid | status_path |
    |------+-------------|
    |  ... |         ... |

    nmaps_edits:
    | commit_id | puid | ... | action_path | object_path |
    |-----------+------+-----+-------------+-------------|
    |       ... |  ... | ... |         ... |         ... |

    Result:
    | commit_id | puid | ... | action_path | object_path | status_path |
    |-----------+------+-----+-------------+-------------+-------------|
    |       ... |  ... | ... |         ... |         ... |         ... |
    '''

    nmaps_edits = nmaps_edits_action.join(
        nmaps_edits_object,
        by=('fielddate',
            'puid',
            'object_id',
            'commit_id',
            'region_id',
            ),
        type='left'
    ).label('nmaps_edits')

    return nmaps_edits.join(
        nmaps_users_status,
        by=('puid'),
        type='inner'
    ).label('make_nmaps_edits_for_count')


def prepare_result_counts(stream):
    '''
    Calculating the result of statistics.
    Stream is aggregated by 3 unique values: commit_id, object_id, and puid.

    stream
    | fielddate | region_tree | action_path | status_path | object_path | object_id | puid | commit_id |
    |-----------+-------------+-------------+-------------+-------------+-----------+------+-----------|
    |       ... |         ... |         ... |         ... |         ... |       ... |  ... |       ... |

    stream_count_uniq_commit_id
    | fielddate | region_tree | action_path | status_path | object_path | count_uniq_commit_id |
    |-----------+-------------+-------------+-------------+-------------+----------------------|
    |       ... |         ... |         ... |         ... |         ... |                  ... |

    stream_count_uniq_object_id
    | fielddate | region_tree | action_path | status_path | object_path | count_uniq_object_id |
    |-----------+-------------+-------------+-------------+-------------+----------------------|
    |       ... |         ... |         ... |         ... |         ... |                  ... |

    stream_count_uniq_puid
    | fielddate | region_tree | action_path | status_path | object_path | count_uniq_puid |
    |-----------+-------------+-------------+-------------+-------------+-----------------|
    |       ... |         ... |         ... |         ... |         ... |             ... |

    result
    | fielddate | region_tree | action_path | status_path | object_path | count_uniq_commit_id | count_uniq_object_id | count_uniq_puid |
    |-----------+-------------+-------------+-------------+-------------+----------------------+----------------------+-----------------|
    |       ... |         ... |         ... |         ... |         ... |                  ... |                  ... |             ... |
    '''
    stream_count_uniq_commit_id = stream.unique(
        'fielddate',
        'region_tree',
        'action_path',
        'status_path',
        'object_path',
        'commit_id'
    ).groupby(
        'fielddate',
        'region_tree',
        'action_path',
        'status_path',
        'object_path'
    ).aggregate(
        count_uniq_commit_id=na.count()
    ).label('aggegate_by_commit_id')
    stream_count_uniq_object_id = stream.unique(
        'fielddate',
        'region_tree',
        'action_path',
        'status_path',
        'object_path',
        'object_id'
    ).groupby(
        'fielddate',
        'region_tree',
        'action_path',
        'status_path',
        'object_path'
    ).aggregate(
        count_uniq_object_id=na.count()
    ).label('aggegate_by_object_id')
    stream_count_uniq_puid = stream.unique(
        'fielddate',
        'region_tree',
        'action_path',
        'status_path',
        'object_path',
        'puid'
    ).groupby(
        'fielddate',
        'region_tree',
        'action_path',
        'status_path',
        'object_path',
    ).aggregate(
        count_uniq_puid=na.count()
    ).label('aggegate_by_puid')
    return stream_count_uniq_commit_id.join(
        stream_count_uniq_object_id,
        by=('fielddate',
            'region_tree',
            'action_path',
            'status_path',
            'object_path',
            ),
        type='inner'
    ).join(
        stream_count_uniq_puid,
        by=('fielddate',
            'region_tree',
            'action_path',
            'status_path',
            'object_path',
            ),
        type='inner'
    ).project(
        'count_uniq_commit_id',
        'count_uniq_object_id',
        'count_uniq_puid',
        'fielddate',
        'action_path',
        'status_path',
        'object_path',
        geo_path='region_tree'
    ).label('prepare_result_counts')


def set_region_tree(stream, major_regions):
    return stream.join(
        major_regions,
        by='region_id',
        type='left',
        assume_small_right=True,
        allow_undefined_keys=False,
        assume_defined=True,
        memory_limit=8 * 1024
    ).project(
        ne.all(),
        region_tree=ne.custom(
            lambda rt: rt or DEFAULT_REGION_TREE,
            'region_tree'
        )
    )
