from nile.api.v1 import (
    filters,
    Record
)

from qb2.api.v1 import (
    extractors as qe,
    typing as qt,
)

from yt.wrapper.ypath import ypath_join

_USERS_DUMP_DIR = '//home/logfeller/logs/nmaps-users-dump-log/1d'
_CRYPTA_PUID_YANDEXUID_SOUP = '//home/crypta/public/matching/by_id/puid/soup/yandexuid/passport-auth_fp'
_CRYPTA_YANDEXUID_WITH_ALL_INFO = '//home/crypta/public/ids_storage/yandexuid/yuid_with_all_info'
_MAJOR_REGIONS_MAP = '//home/maps/core/nmaps/analytics/geo-data/major_regions_map'
_USERS_HOME_REGION_DIR = '//home/maps/core/nmaps/analytics/logs/users_home_regions'

_RESULTS_SCHEMA = {'puid': qt.String, 'major_ids': qt.List[qt.String]}

_EARTH_ID = '10000'

_MEMORY_LIMIT = 8*1024


def _add_yandexuid(users_dump, crypta_puid_yandexuid_soup):
    '''
    Add yandexuid to puids from nmaps user dump

    users_dump:
    | puid | ... |
    +------+-----+
    |  ... | ... |

    crypta_puid_yandexuid_soup:
    | id1 | id2 |     id1Type | id2Type | ... |
    +-----+-----+-------------+---------+-----+
    | ... | ... | 'yandexuid' |  'puid' | ... |

    Result:
    | puid | yandexuid |
    +------+-----------+
    |  ... |       ... |
    '''
    puid_yandexuid = crypta_puid_yandexuid_soup.project(
        puid='id2',
        yandexuid='id1'
    )

    return users_dump.project(
        'puid'
    ).join(
        puid_yandexuid,
        type='inner',
        by='puid',
        assume_unique_left=True,
    )


def _add_regions_ids(nmaps_puids_with_yandexuids, crypta_yandexuid_with_all_info):
    '''
    Add log region ids from crypta to nmaps puids, drop yandexuid column

    nmaps_puids_with_yandexuids:
    | puid | yandexuid |
    +------+-----------+
    |  ... |       ... |

    crypa_yandexuid_with_all_info:
    | id  |     id_type | log_region_ids | ... |
    +-----+-------------+----------------+-----|
    | ... | 'yandexuid' |            ... | ... |

    result:
    | puid | region_id |
    +------+-----------+
    |  ... |       ... |
    '''
    return nmaps_puids_with_yandexuids.join(
        crypta_yandexuid_with_all_info,
        type='inner',
        by_left='yandexuid',
        by_right='id',
    ).project(
        'puid',
        qe.unfold('region_id', 'log_region_ids')
    )


def _convert_and_fold_region_id_to_major_id(nmaps_puids_with_unfolded_region_ids, major_regions_map):
    '''
    Group all regions for each puid and convert them to all major_ids

    nmaps_puids_with_region_ids:
    | puid | region_id |
    +------+-----------+
    |  ... |       ... |

    major_regions_map:
    | region_id | major__id | ... |
    +-----------+-----------+-----+
    |       ... |       ... | ... |

    result:
    | puid | major_ids |
    +------+-----------+
    |  ... |       ... |
    '''
    def reducer(groups, **options):
        for puid, records in groups:
            major_ids = set()
            for record in records:
                major_ids.add(record['major_id'])
            yield Record(puid, major_ids=list(major_ids))

    return nmaps_puids_with_unfolded_region_ids.join(
        major_regions_map,
        by='region_id',
        assume_small_right=True,
        memory_limit=_MEMORY_LIMIT
    ).groupby(
        'puid'
    ).reduce(
        reducer
    )


def make_users_home_region_job(job, date):
    users_dump = job.table(
        ypath_join(_USERS_DUMP_DIR, date)
    ).label('users_dump')

    crypta_puid_yandexuid_soup = job.table(
        _CRYPTA_PUID_YANDEXUID_SOUP
    ).label('crypta_puid_yandexuid_soup')
    nmaps_puids_with_yandexuids = _add_yandexuid(
        users_dump,
        crypta_puid_yandexuid_soup
    ).label('nmaps_puids_with_yandexuids')

    crypta_yandexuid_with_all_info = job.table(
        _CRYPTA_YANDEXUID_WITH_ALL_INFO
    ).label('crypta_yandexuid_with_all_info')
    nmaps_puids_with_region_ids = _add_regions_ids(
        nmaps_puids_with_yandexuids,
        crypta_yandexuid_with_all_info
    ).label('nmaps_puids_with_region_ids')

    major_regions_map = job.table(
        _MAJOR_REGIONS_MAP
    ).label('major_regions_map')
    nmaps_puids_with_major_ids = _convert_and_fold_region_id_to_major_id(
        nmaps_puids_with_region_ids,
        major_regions_map
    ).label('users_home_region')

    nmaps_puids_with_major_ids.put(
        ypath_join(_USERS_HOME_REGION_DIR, date),
        schema=_RESULTS_SCHEMA
    )

    return job
