import datetime
import logging

import numpy as np
from flask import current_app as app
from nile.api.v1 import filters
from qb2.api.v1 import filters as qf

from jafar.commands.stats.update_usage_stats import get_offline_metrika_tables
from jafar.datasets.base import BaseDatasetProcessor, get_table_name
from jafar.datasets.filters import filter_invalid_items
from jafar.utils.io import get_cluster
from jafar_yt import location_stats, usage_stats
from jafar_yt.utils.helpers import is_valid_uuid

LOCALITY_USAGE_STATS_COLLECT_DURATION = 30  # in days

logger = logging.getLogger(__name__)

LOCALITY_DTYPE = [
    ('item', np.object),
    ('region', np.int32),
    ('score', np.float32),
]


class LocalityDatasetProcessor(BaseDatasetProcessor):
    """DatasetProcessor for app locality data"""
    source = 'locality'

    filters = (filter_invalid_items,)

    interactions_dtype = LOCALITY_DTYPE

    def __init__(self):
        super(LocalityDatasetProcessor, self).__init__()

        end_date = datetime.date.today()
        start_date = end_date - datetime.timedelta(days=LOCALITY_USAGE_STATS_COLLECT_DURATION)
        dates = get_offline_metrika_tables(start_date, end_date)

        self.yt_table_source = "{root}/{{{dates}}}".format(
            root=app.config['YT_MOBMETRIKA_PATH_1_DAY'],
            dates=','.join(dates)
        )
        self.df_filename = '{}.pack'.format(self.source)

    @property
    def interaction_schema(self):
        return dict(item=str, region=int, score=float)

    def update_many(self, countries):
        cluster = get_cluster()
        job = cluster.job()
        stream = job.table(
            self.yt_table_source, ignore_missing=True
        ).filter(
            filters.equals('APIKey', app.config['YT_LAUNCHER_API_KEY']),
            filters.custom(is_valid_uuid, 'DeviceID'),
            qf.one_of('EventName', ['app_launch', 'App_install']),
            qf.defined('EventValue'),
        ).map(
            usage_stats.usage_stats_mapper,
        ).groupby(
            *usage_stats.REDUCE_FIELDS
        ).reduce(
            usage_stats.usage_stats_reducer
        ).map(
            location_stats.LocationStatsMapper(),
            memory_limit=5 * 1024,
            intensity='cpu'
        ).groupby(
            'item'
        ).reduce(
            location_stats.location_reducer,
            memory_limit=8 * 1024,
            intensity='cpu'
        ).map(
            location_stats.location_stats_filter
        ).map(
            location_stats.country_mapper
        )

        for country in countries:
            stream, country_stream = stream.split(
                filters.equals('country', country)
            )
            for _filter in self.filters:
                country_stream = country_stream.call(_filter, job=job, country=country)

            path = get_table_name(self.yt_table_result, country)
            country_stream.project('item', 'region', 'score').put(path, schema=self.interactions_schema)
        job.run()
