from maps.wikimap.stat.tasks_payment.tasks_logging.libs.schema import CARTOGRAPHIC_TASKS_LOG_SCHEMA
from maps.wikimap.stat.tasks_payment.tasks_logging.libs.geometry import append_bbox, mercator_ewkb_geometry_to_geo_bbox
from maps.wikimap.stat.tasks_payment.tasks_logging.libs.parsing import isotime_normalizer
from nile.api.v1 import extractors, filters
from cyson import UInt


_TASK_ID_PREFIX_CLOSED = b'moderation/closed/'
_TASK_ID_PREFIX_RESOLVED = b'moderation/resolved/'

_ROBOT_UID = 82282794


def _filter_by_date(column_name, date):
    return filters.custom(
        lambda value: value is not None and value.startswith(date),
        column_name
    )


def _extract_as_uint(column_name):
    return extractors.custom(
        lambda value: UInt(value),
        column_name
    )


def _get_tasks(task_active_table, task_closed_table, date):
    '''
    Extracts tasks that were closed or resolved at the provided `date` from
    social.task_active and social.task_closed tables.

    task_active/closed_table:
    | event_id | closed_at | closed_by | resolved_at | resolved_by | primary_object_category_id | type | ... |
    +----------+-----------+-----------+-------------+-------------+----------------------------+------+-----+
    |      ... |       ... |       ... |         ... |         ... |                        ... |  ... | ... |

    Result:
    | event_id | closed_at | closed_by | resolved_at | resolved_by | primary_object_category_id |
    +----------+-----------+-----------+-------------+-------------+----------------------------+
    |      ... |       ... |       ... |         ... |         ... |                        ... |
    '''

    return task_active_table.concat(
        task_closed_table
    ).filter(
        filters.or_(
            _filter_by_date('resolved_at', date),
            _filter_by_date('closed_at', date)
        )
    ).project(
        'event_id',
        'closed_at',
        'closed_by',
        'resolved_at',
        'resolved_by',
        'primary_object_category_id'
    )


def _append_geometry_and_created_by(tasks, commit_event_table):
    '''
    Appends geometry column from social.commit_event to the table got from
    _get_tasks() by joining these tables by `event_id`.

    tasks:
    | event_id | closed_at | closed_by | resolved_at | resolved_by | primary_object_category_id |
    +----------+-----------+-----------+-------------+-------------+----------------------------+
    |      ... |       ... |       ... |         ... |         ... |                        ... |

    commit_event_table:
    | event_id | bounds_geom | created_by | ... |
    +----------+-------------+------------+-----+
    |      ... |         ... |        ... | ... |

    Result:
    | closed_at | closed_by | resolved_at | resolved_by | created_by | primary_object_category_id | bounds_geom |
    +-----------+-----------+-------------+-------------+------------+----------------------------+-------------+
    |       ... |       ... |         ... |         ... |        ... |                        ... |         ... |
    '''

    return tasks.join(
        commit_event_table, by='event_id', type='left', assume_unique=True
    ).project(
        'closed_at',
        'closed_by',
        'resolved_at',
        'resolved_by',
        'created_by',
        'primary_object_category_id',
        'bounds_geom',
    )


def _split_by_closed_at_and_resolved_at(tasks_with_bbox, date):
    '''
    Split tasks by means of the following rules:
    - tasks resolved at the provided date are pushed to `resolved_at_tasks` stream;
    - autoclosed tasks (closed_at == resolved_at) are pushed to
      `resolved_at_tasks` stream only; and
    - tasks closed at the provided date are pushed to `closed_at_tasks` stream.

    Note. A task resolved and closed at the same date will be pushed to both streams.

    tasks_with_bbox:
    | closed_at | closed_by | resolved_at | resolved_by | created_by | primary_object_category_id | bounds_geom | lat_min | lon_min | lat_max | lon_max |
    +-----------+-----------+-------------+-------------+------------+----------------------------+-------------+---------+---------+---------+---------+
    |       ... |       ... |         ... |         ... |        ... |                        ... |         ... |     ... |     ... |     ... |     ... |

    Result:

    closed_tasks:
    |   closed_at |   closed_by | created_by | primary_object_category_id | bounds_geom | lat_min | lon_min | lat_max | lon_max |
    +-------------+-------------+------------+----------------------------+-------------+---------+---------+---------+---------+
    |    date-... |         ... |        ... |                        ... |         ... |     ... |     ... |     ... |     ... |

    resolved_tasks:
    | resolved_at | resolved_by | created_by | primary_object_category_id | bounds_geom | lat_min | lon_min | lat_max | lon_max |
    +-------------+-------------+------------+----------------------------+-------------+---------+---------+---------+---------+
    |    date-... |         ... |        ... |                        ... |         ... |     ... |     ... |     ... |     ... |
    '''

    def split(records, closed_tasks, resolved_tasks):
        for record in records:
            # process autoclosed tasks mistakenly stored as tasks closed by human
            if record.resolved_at == record.closed_at and record.resolved_at.startswith(date):
                if record.resolved_by == record.closed_by:
                    # close by robot
                    resolved_tasks(record.transform('closed_at', 'closed_by'))
                    closed_tasks(record.transform('resolved_at', 'resolved_by', closed_by=_ROBOT_UID))
                    continue
                elif record.resolved_by == _ROBOT_UID:
                    # resolve by user, close by robot
                    resolved_tasks(record.transform('closed_at', 'closed_by', resolved_by=record.closed_by))
                    closed_tasks(record.transform('resolved_at', 'resolved_by', closed_by=_ROBOT_UID))
                    continue

            if record.resolved_at.startswith(date):
                resolved_tasks(record.transform('closed_at', 'closed_by'))

            if record.closed_at is not None and record.closed_at.startswith(date):
                closed_tasks(record.transform('resolved_at', 'resolved_by'))

    return tasks_with_bbox.map(split)


def _remove_self_processed_tasks(tasks, puid_column):
    '''
    Removes tasks closed/resolved by the author.

    tasks:
    | time_column | puid_column | created_by | primary_object_category_id | bounds_geom | lat_min | lon_min | lat_max | lon_max |
    +-------------+-------------+------------+----------------------------+-------------+---------+---------+---------+---------+
    |         ... |         ... |        ... |                        ... |         ... |     ... |     ... |     ... |     ... |

    Result:
    | time_column | puid_column | primary_object_category_id | bounds_geom | lat_min | lon_min | lat_max | lon_max |
    +-------------+-------------+----------------------------+-------------+---------+---------+---------+---------+
    |         ... |         ... |                        ... |         ... |     ... |     ... |     ... |     ... |
    '''

    return tasks.filter(
        filters.custom(
            lambda puid, created_by: created_by != puid,
            puid_column, 'created_by'
        )
    ).project(
        extractors.all(['created_by'])
    )


def _remove_autoprocessed_dependent_tasks(tasks, time_column, puid_column):
    '''
    Leaves only one entry for a group of tasks simultaneously resolved or closed.

    tasks:
    | time_column | puid_column | primary_object_category_id | bounds_geom | lat_min | lon_min | lat_max | lon_max |
    +-------------+-------------+----------------------------+-------------+---------+---------+---------+---------+
    |         ... |         ... |                        ... |         ... |     ... |     ... |     ... |     ... |


    Result:
    | time_column | puid_column | primary_object_category_id | bounds_geom | lat_min | lon_min | lat_max | lon_max |
    +-------------+-------------+----------------------------+-------------+---------+---------+---------+---------+
    |         ... |         ... |                        ... |         ... |     ... |     ... |     ... |     ... |
    '''

    return tasks.groupby(time_column, puid_column, 'primary_object_category_id').take(1)


def _task_id_extractor(task_id_prefix):
    return extractors.custom(
        lambda category_id: task_id_prefix + b'common' if category_id is None else task_id_prefix + category_id,
        'primary_object_category_id'
    )


def _convert_to_result_format(tasks, time_column, puid_column, task_id_prefix):
    '''
    tasks:
    | time_column | puid_column | primary_object_category_id | bounds_geom | lat_min | lon_min | lat_max | lon_max |
    +-------------+-------------+----------------------------+-------------+---------+---------+---------+---------+
    |         ... |         ... |                        ... |         ... |     ... |     ... |     ... |     ... |

    Result:
    | iso_datetime | puid |                    task_id | quantity | geom | lat_min | lon_min | lat_max | lon_max |
    +--------------+------+----------------------------+----------+------+---------+---------+---------+---------+
    |          ... |  ... | task_id_prefix/category_id |      ... |  ... |     ... |     ... |     ... |     ... |

    '''
    return tasks.project(
        iso_datetime=extractors.custom(isotime_normalizer(), time_column),
        puid=_extract_as_uint(puid_column),
        task_id=_task_id_extractor(task_id_prefix),
        quantity=extractors.const(1.0),
        geom='bounds_geom',
        lat_min='lat_min',
        lon_min='lon_min',
        lat_max='lat_max',
        lon_max='lon_max'
    )


def make_moderation_log_job(job, date, result_path, task_active_path, task_closed_path, commit_event_path):
    date = date.encode('utf-8')

    task_active_table = job.table(task_active_path).label('social.task_active')
    task_closed_table = job.table(task_closed_path).label('social.task_closed')
    tasks = _get_tasks(task_active_table, task_closed_table, date).label('tasks')

    commit_event_table = job.table(commit_event_path).label('social.commit_event')
    tasks_with_geometry_and_created_by = _append_geometry_and_created_by(
        tasks, commit_event_table
    ).label('tasks with geometry and created_by')
    tasks_with_bbox = append_bbox(
        tasks_with_geometry_and_created_by, 'bounds_geom', mercator_ewkb_geometry_to_geo_bbox
    ).label('tasks with bbox')

    closed_tasks, resolved_tasks = _split_by_closed_at_and_resolved_at(tasks_with_bbox, date)
    closed_tasks.label('closed tasks')
    resolved_tasks.label('resolved tasks')

    closed_tasks_without_self_created = _remove_self_processed_tasks(closed_tasks, 'closed_by').label('closed tasks without self created')
    closed_tasks_without_autoclosed = _remove_autoprocessed_dependent_tasks(
        closed_tasks_without_self_created, 'closed_at', 'closed_by'
    ).label('closed tasks without autoclosed')
    result_closed_tasks = _convert_to_result_format(
        closed_tasks_without_autoclosed, 'closed_at', 'closed_by', _TASK_ID_PREFIX_CLOSED
    ).label('result closed tasks')

    resolved_tasks_without_self_created = _remove_self_processed_tasks(resolved_tasks, 'resolved_by').label('resolved tasks without self created')
    resolved_tasks_without_autoresolved = _remove_autoprocessed_dependent_tasks(
        resolved_tasks_without_self_created, 'resolved_at', 'resolved_by'
    ).label('resolved tasks without autoresolved')
    result_resolved_tasks = _convert_to_result_format(
        resolved_tasks_without_autoresolved, 'resolved_at', 'resolved_by', _TASK_ID_PREFIX_RESOLVED
    ).label('result resolved tasks')

    result = job.concat(result_closed_tasks, result_resolved_tasks).label('result')
    result.put(result_path, schema=CARTOGRAPHIC_TASKS_LOG_SCHEMA, ensure_optional=False)

    return job
