import logging
from datetime import date, timedelta

import yt.wrapper as yt
from flask import current_app as app
from nile.api.v1 import filters as nf, aggregators as na, files, extractors as ne, get_records_from_file
from qb2.api.v1 import filters as qf

from jafar.utils.io import get_apps_table
from jafar_yt import update_datasets

logger = logging.getLogger(__name__)


def append_value(stream, **kwargs):
    return stream.project(ne.all(), value=ne.const(1.0))


def fix_invalid_timestamps(stream, **kwargs):
    return stream.map(update_datasets.fix_invalid_timestamps)


def filter_item_min_install_count(stream, min_percentile=90, min_organic_rate=0.01, **kwargs):
    # prepare install stats for apps to filter by it
    filtered_items = stream.project(
        'item',
        'is_user_app'
    ).groupby(
        'item'
    ).aggregate(
        installs=na.count(),
        organic_installs=na.sum('is_user_app')
    )

    # table with only one element - threshold
    threshold = files.StreamFile(filtered_items.aggregate(
        threshold=na.percentile('installs', min_percentile)
    ),
        'threshold'
    )

    def items_filter(records):
        treshold = get_records_from_file('threshold').next().threshold
        for record in records:
            if (record.installs > treshold) and (float(record.organic_installs) / record.installs > min_organic_rate):
                yield record

    # filter unpopular and mostly preinstalled apps
    filtered_items = filtered_items.map(
        items_filter,
        files=[threshold]
    ).project(
        'item'
    )

    return stream.join(
        filtered_items,
        by='item',
        assume_unique_right=True,
        assume_small_right=True,
        allow_undefined_keys=False,
        assume_defined=True
    ).filter(
        nf.custom(lambda x: x > 0, 'is_user_app')
    ).project(
        ne.all(),
        is_user_app=ne.const(True)
    )


def filter_duplicates(stream, fields=('user', 'item'), **kwargs):
    return stream.unique(*fields)


def filter_invalid_items(stream, job, country, **kwargs):
    """
    Filters out invalid apps from the dataset, which include:
     * Apps without categories
     * Apps alpha versions
     * apps in HARD_BANNED_APPS (Yandex Launcher and friends)
    """

    region = app.config['GOOGLE_PLAY_REGIONS_MAP'][country]
    hard_banned_apps = app.config['HARD_BANNED_APPS']

    filter_stream = job.table(
        get_apps_table()
    ).project(
        'region',
        'subcategory_eng',
        item='id',
        is_alpha=ne.custom(bool, 'is_alpha'),
    ).filter(
        nf.equals('region', region),
        qf.defined('subcategory_eng'),
        qf.nonzero('subcategory_eng'),
        nf.equals('is_alpha', False),
        qf.not_(qf.one_of('item', hard_banned_apps)),
    ).project(
        'item'
    )
    return stream.join(filter_stream, by='item', type='inner')


def filter_new_users(stream, profile_age=200, **kwargs):
    # leave users at most `profile_age` days old
    return stream.filter(nf.custom(lambda value: value <= profile_age, 'profile_age'))
