from maps.wikimap.stat.tasks_payment.tasks_logging.libs.geometry import append_bbox, geo_wkb_geometry_to_geo_bbox
from nile.api.v1 import extractors, filters, Record
from cyson import UInt
import datetime
import pytz

_MRC_ACCOUNT_PREFIX = b'mrc_pedestrian:'

_REGION_STATUS_START = b'can_start'
_REGION_STATUS_FINISH = b'awaiting_check'

_RESOLUTION_REJECTED = b'rejected'
_REJECT_REASONS_INVALID = [b'incorrect-data', b'no-info', b'no-process', b'prohibited-by-rules', b'spam']

_TASK_ID_VALID = b'feedback/created/mrc-pedestrian-region/valid'
_TASK_ID_INVALID = b'feedback/created/mrc-pedestrian-region/invalid'


def _extract_as_uint(column_name):
    return extractors.custom(
        lambda value: UInt(value),
        column_name
    )


def _extract_as_int(column_name):
    return extractors.custom(
        lambda value: int(value),
        column_name
    )


def _filter_by_date(column_name, date):
    date = date.encode()
    return filters.custom(
        lambda value: value is not None and value.startswith(date),
        column_name
    )


def _convert_microseconds_to_isodate(mrc_pedestrian_region_log):
    '''
    mrc_pedestrian_region_log:
    | timestamp | ... |
    |-----------+-----|
    | ...       | ... |

    Result:
    | isodate | ... |
    |---------+-----|
    | ...     | ... |
    '''
    msk_tz = pytz.timezone('Europe/Moscow')
    return mrc_pedestrian_region_log.project(
        extractors.all(exclude=['timestamp']),
        isodate=extractors.custom(
            lambda timestamp: datetime.datetime.fromtimestamp(timestamp / 1000000, msk_tz).isoformat().encode(),
            'timestamp'
        )
    )


def _mrc_regions_to_bill(date, mrc_pedestrian_region_log):
    '''
    mrc_pedestrian_region_log:
    | object_id | name | status | isodate | ... |
    |-----------+------+--------+---------+-----|
    | ...       | ...  | ...    | ...     | ... |

    Result:
    | mrc_region_id | mrc_region_name | started_at | finished_at |
    |---------------+-----------------+------------+-------------|
    | ...           | ...             | ...        | ...         |
    '''
    date = date.encode()

    def start_finish_reducer(groups):
        for key, records in groups:
            started_at=None
            for record in records:
                if record.status == _REGION_STATUS_START:
                    if started_at is None:
                        started_at = record.isodate
                elif record.status == _REGION_STATUS_FINISH:
                    if started_at is not None:
                        if record.isodate is not None and record.isodate.startswith(date):
                            yield Record(
                                mrc_region_id=record.object_id,
                                mrc_region_name=f'{record.name.decode()} ({record.object_id})'.encode(),
                                started_at=started_at,
                                finished_at=record.isodate,
                            )
                        started_at=None

    return mrc_pedestrian_region_log.groupby(
        'object_id', 'name'
    ).sort(
        'isodate'
    ).reduce(
        start_finish_reducer
    )


def _get_mrc_region_id(task_id):
    if task_id is not None and task_id.startswith(_MRC_ACCOUNT_PREFIX):
        return int(task_id[len(_MRC_ACCOUNT_PREFIX):])
    else:
        return None


def _extract_mrc_region_id(task_id, ugc_account_task_id):
    return extractors.custom(
        lambda task_id, ugc_account_task_id:
        _get_mrc_region_id(task_id) if task_id is not None else _get_mrc_region_id(ugc_account_task_id),
        task_id, ugc_account_task_id
    )


def _get_walk_objects(
    mrc_regions_to_bill,
    signals_walk_object_replica
):
    '''
    mrc_regions_to_bill:
    | mrc_region_id | mrc_region_name | started_at | finished_at |
    |---------------+-----------------+------------+-------------|
    | ...           | ...             | ...        | ...         |

    signals_walk_object_replica:
    | feedback_task_id | user_id | task_id or ugc_account_task_id | created_at | geometry | ... |
    |------------------+---------+--------------------------------+------------+----------+-----|
    | ...              | ...     | "mrc_pedestrian:MRC_REGION_ID" | ...        | ...      | ... |

    Result:
    | feedback_task_id | user_id | finished_at | geom | mrc_region |
    |------------------+---------+-------------+------+------------|
    | ...              | ...     | ...         | ...  | ...        |
    '''
    return signals_walk_object_replica.project(
        'feedback_task_id', 'user_id', 'created_at',
        geom='geometry',
        mrc_region_id=_extract_mrc_region_id('task_id', 'ugc_account_task_id')
    ).join(
        mrc_regions_to_bill,
        by='mrc_region_id',
        assume_small_right=True
    ).filter(
        filters.custom(
            lambda created, started, finished: started <= created <= finished,
            'created_at', 'started_at', 'finished_at')
    ).project(
        'feedback_task_id', 'user_id', 'finished_at', 'geom',
        mrc_region='mrc_region_name'
    )


def _add_walk_object_resolution(
    pedestrian_objects,
    feedback_dump
):
    '''
    pedestrian_objects:
    | feedback_task_id | user_id | finished_at | geom | mrc_region |
    |------------------+---------+-------------+------+------------|
    | ...              | ...     | ...         | ...  | ...        |

    feedbacks_dump:
    | id  | resolution | reject_reason | ... |
    |-----+------------+---------------+-----|
    | ... | ...        | ...           | ... |

    Result:
    | user_id | finished_at | geom | mrc_region | resolution | reject_reason |
    |---------+-------------+------+------------+------------+---------------|
    | ...     | ...         | ...  | ...        | ...        | ...           |
    '''
    feedback_dump = feedback_dump.project(
        'resolution', 'reject_reason',
        feedback_task_id=_extract_as_int('id')
    )

    return pedestrian_objects.join(
        feedback_dump,
        by='feedback_task_id'
    ).project(
        extractors.all(exclude=['feedback_task_id'])
    )


def _add_task_id(pedestrian_objects):
    '''
    pedestrian_objects:
    | resolution | reject_reason | ... |
    |------------+---------------+-----|
    | ...        | ...           | ... |

    Result:
    | task_id | ... |
    |---------+-----|
    | ...     | ... |
    '''
    def get_task_id(resolution, reject_reason):
        if resolution == _RESOLUTION_REJECTED and reject_reason in _REJECT_REASONS_INVALID:
            return _TASK_ID_INVALID
        return _TASK_ID_VALID

    return pedestrian_objects.project(
        extractors.all(exclude=['resolution', 'reject_reason']),
        task_id=extractors.custom(
            lambda resolution, reject_reason: get_task_id(resolution, reject_reason),
            'resolution', 'reject_reason'
        )
    )


def _convert_to_result_format(pedestrian_objects):
    '''
    pedestrian_objects:
    | user_id | finished_at | task_id | mrc_region | geom | lat_min | lon_min | lat_max | lon_max |
    |---------+-------------+---------+------------+------+---------+---------+---------+---------|
    | ...     | ...         | ...     | ...        | ...  | ...     | ...     | ...     | ...     |

    Result:
    | iso_datetime | puid (uint) | task_id | mrc_region | quantity | geom | lat_min | lon_min | lat_max | lon_max |
    |--------------+-------------+---------+------------+----------+------+---------+---------+---------+---------|
    | finished_at  | user_id     | ...     | ...        |        1 | ...  | ...     | ...     | ...     | ...     |
    '''
    return pedestrian_objects.project(
        'task_id', 'mrc_region', 'geom', 'lat_min', 'lon_min', 'lat_max', 'lon_max',
        iso_datetime='finished_at',
        puid=_extract_as_uint('user_id'),
        quantity=extractors.const(1.0),
    )


def make_log(
    date,
    signals_walk_object_replica,
    feedback_dump,
    mrc_pedestrian_region_log,
):
    '''
    signals_walk_object_replica:
    | id  | feedback_task_id | user_id | ugc_account_task_id            | created_at | ... |
    |-----+------------------+---------+--------------------------------+------------+-----|
    | ... | ...              | ...     | "mrc_pedestrian:MRC_REGION_ID" | ...        | ... |

    feedback_dump:
    | id  | resolution | reject_reason | ... |
    |-----+------------+---------------+-----|
    | ... | ...        | ...           | ... |

    mrc_pedestrian_region_log:
    | object_id | name | company_id | status | timestamp |
    |-----------+------+------------+--------+-----------|
    | ...       | ...  | ...        | ...    | ...       |

    Result:
    | iso_datetime | puid | task_id | mrc_region | quantity | geom | lat_min | lon_min | lat_max | lon_max |
    |--------------+------+---------+------------+----------+------+---------+---------+---------+---------|
    | ...          | ...  | ...     | ...        |        1 | ...  | ...     | ...     | ...     | ...     |
    '''
    mrc_pedestrian_region_log = _convert_microseconds_to_isodate(mrc_pedestrian_region_log)
    mrc_regions_to_bill = _mrc_regions_to_bill(date, mrc_pedestrian_region_log)

    pedestrian_objects = _get_walk_objects(mrc_regions_to_bill, signals_walk_object_replica)
    pedestrian_objects = _add_walk_object_resolution(pedestrian_objects, feedback_dump)
    pedestrian_objects = append_bbox(pedestrian_objects, 'geom', geo_wkb_geometry_to_geo_bbox)
    pedestrian_objects = _add_task_id(pedestrian_objects)

    return _convert_to_result_format(pedestrian_objects)
