from maps.wikimap.stat.libs.common.lib import dates as d

from nile.api.v1 import (
    aggregators as na,
    extractors as ne,
    utils as nu,
    files as nfs,
    statface as ns,
    Record
)

from yt.wrapper import ypath_join
from collections import defaultdict


USERS_HOME_REGIONS_TABLES = 'logs/users_home_regions'
EDITS_MODERATION_LOG = 'logs/nmaps-edits-moderation-users-log/1d'
MAJOR_REGIONS_TABLE = 'geo-data/major_regions_map'
USER_REGIONS_TABLE = 'geo-data/puid_major_regions_list'
VALUABLE_CATEGORIES_TABLE = 'geo-data/valuable_categories'

REPORT_PATH = 'Maps.Wiki/KPI/ValuableLocalEdits'
REPORT_KEY_FIELDS = ['fielddate', 'agg_scale']
REPORT_SCALE = 'd'

ACTIONS = [
    'object-created',
    'object-modified',
    'object-deleted'
]

RESOLUTIONS = [
    'edit',
    'accept',
]

FREE_MODERATION_STATUS = [
    'common',
    'moderator',
    'expert',
]

ROBOT_MODERATION_STATUS = 'robot'

ALL_STATUS = '\tall\t'
PAID_STATUS = '\tall\tpaid\t'
ROBOT_STATUS = '\tall\trobot\t'
FREE_STATUS = '\tall\tfree\t'
FREE_LOCAL_STATUS = '\tall\tfree\tlocal\t'
FREE_ALIEN_STATUS = '\tall\tfree\talien\t'
FREE_GHOST_STATUS = '\tall\tfree\tghost\t'


def process_edits(records):
    for record in records:
        if record['action'] not in ACTIONS:
            continue

        if 'moderation_resolved_resolution' in record and \
                record['moderation_resolved_resolution'] not in RESOLUTIONS:
            continue

        locality_statuses = [ALL_STATUS, ]
        if record['moderation_status'] in FREE_MODERATION_STATUS:
            locality_statuses.append(FREE_STATUS)
            if 'major_ids' not in record:
                locality_statuses.append(FREE_GHOST_STATUS)
            elif record['major_id'] in record['major_ids']:
                locality_statuses.append(FREE_LOCAL_STATUS)
            else:
                locality_statuses.append(FREE_ALIEN_STATUS)
        elif record['moderation_status'] == ROBOT_MODERATION_STATUS:
            locality_statuses.append(ROBOT_STATUS)
        else:
            locality_statuses.append(PAID_STATUS)

        for locality in locality_statuses:
            yield Record(
                record,
                locality=locality,
            )


def make_report(job, statface_client, last_isodate, agg_scale, analytics_path, results_path):
    dates_str = d.aggregation_dates_str(last_isodate, agg_scale)

    edits_moderation_log = job.table(
        ypath_join(analytics_path, EDITS_MODERATION_LOG, dates_str)
    ).label(
        'edits_moderation_log'
    )

    user_regions = job.table(
        ypath_join(analytics_path, USERS_HOME_REGIONS_TABLES, last_isodate)
    ).label('users_regions_data')

    major_regions = job.table(
        ypath_join(analytics_path, MAJOR_REGIONS_TABLE)
    ).label('major_regions')

    valuable_categories = job.table(
        ypath_join(analytics_path, VALUABLE_CATEGORIES_TABLE)
    ).label('valuable_categories')

    @nu.with_hints(files=[nfs.StreamFile(major_regions, 'major_regions')])
    def majors_closure_mapper(records, **options):
        def build_majors_closure_map(major_regions_map_records):
            majors = set()
            closure = defaultdict(set)
            for record in major_regions_map_records:
                if record['least_major']:
                    majors.add(record['region_id'])
                closure[record['region_id']].add(record['major_id'])
            return {region_id: region_closure for region_id, region_closure in closure.items() if region_id in majors}

        majors_closure_map = build_majors_closure_map(options['file_streams']['major_regions'])

        def calc_closure(major_ids):
            closure = set()
            for major_id in major_ids:
                closure |= majors_closure_map[major_id]
            return list(closure)

        for record in records:
            yield Record(puid=record['puid'], major_ids=calc_closure(record['major_ids']))

    log_with_locality = edits_moderation_log.join(
        user_regions.map(majors_closure_mapper),
        by='puid',
        type='left',
        assume_unique_right=True,
        assume_small=True,
        memory_limit=8*1024,
    ).join(
        valuable_categories,
        by='object_category',
        assume_unique_right=True,
        assume_small=True,
        memory_limit=8*1024,
    ).join(
        major_regions,
        by='region_id',
        assume_small=True,
        memory_limit=8*1024,
    ).map(
        process_edits,
        memory_limit=16*1024,
    ).groupby(
        'category_group',
        'region_tree',
        'population',
        'locality',
    )

    result = log_with_locality.aggregate(
        edits=na.count(),
        users=na.count_distinct_estimate('puid',  precision=0.01),
    ).label(
        'intermediate_result'
    ).project(
        fielddate=ne.const(last_isodate),
        agg_scale=ne.const(agg_scale),
        category_group='category_group',
        region_tree='region_tree',
        locality='locality',
        edits='edits',
        edits_per_mln=ne.custom(
            lambda e, p:
                int(round(e * 1000000.0 / float(p))) if p is not None and p != '0' else None,
                'edits', 'population'
        ),
        users='users',
        users_per_mln=ne.custom(
            lambda u, p:
                int(round(u * 1000000.0 / float(p))) if p is not None and p != '0' else None,
                'users', 'population'
        ),
    ).label(
        'result'
    )

    if results_path is not None:
        result.put(ypath_join(results_path, last_isodate + '_' + agg_scale))

    result.publish(
        ns.StatfaceReport()
            .path(REPORT_PATH)
            .scale(REPORT_SCALE)
            .client(statface_client)
            .replace_mask(*REPORT_KEY_FIELDS),
        allow_change_job=True
    )
