#!/usr/bin/env python
import datetime as dt
import logging

import click
import yt.wrapper as yt
from nile.api.v1 import extractors as ne
from qb2.api.v1 import filters as qf, resources as qr, extractors as qe

from advisor_money.common.cli import DateType
from advisor_money.common.postback_parser import (PostbackParser, postback_is_testing,
                                                  parse_impression_id, get_impression_id_from_hitlog)
from advisor_money.settings import (YT_CONFIG, ADVISOR_ACCESS_LOG_1_DAY, ADVISOR_PROMO_EVENTS_LOG_1_DAY,
                                    ADVISOR_POSTBACKS_PATH, DIRECT_POSTCLICKS_LOG, BS_HIT_LOG,
                                    YANDEX_DIRECT_CONVERSIONS_PATH)
from advisor_money.utils.date_utils import get_yesterday, date_range
from advisor_money.utils.yt_utils import make_yt_path, get_cluster

PROMO_EVENTS_DAYS = 30  # process promo events that happens in a month
HITLOG_DAYS = 5  # how many previous bs-hit-log tables to process
LAUNCHER_PAGE_ID = '262475'

logger = logging.getLogger(__name__)


def concat_yandex_direct_conversions(stream, job, day):
    hitlog_date_range = (day - dt.timedelta(days=HITLOG_DAYS), day)
    date_isoformat = day.isoformat()
    logger.info('Parse Yandex.Direct conversions for %s', date_isoformat)

    hitlog = job.table(
        BS_HIT_LOG.format('{%s..%s}' % hitlog_date_range)
    ).filter(
        qf.equals('pageid', LAUNCHER_PAGE_ID)
    ).project(
        'queryargs',
        timestamp='unixtime',
        hitlog_id='hitlogid'
    )

    postclicks = job.table(
        DIRECT_POSTCLICKS_LOG.format(date_isoformat)
    ).filter(
        qf.equals('PageID', LAUNCHER_PAGE_ID)
    ).project(
        hitlog_id='HitLogID'
    ).join(
        hitlog, by='hitlog_id', type='left'
    ).project(
        'hitlog_id',
        adnetwork=ne.const('yandexdirect'),
        impression_id=ne.custom(get_impression_id_from_hitlog, 'queryargs'),
        request=ne.const(None),
        clid=ne.custom(lambda x: parse_impression_id(x)['clid'], 'impression_id'),
        experiment=ne.custom(lambda x: parse_impression_id(x)['experiment'], 'impression_id'),
        payout_ip=ne.const(None),
        postback_currency=ne.const(None),
        postback_payout_unconverted=ne.const(None),
        timestamp=ne.custom(lambda x: int(float(x)) if x else None, 'timestamp'),
    ).put(
        make_yt_path(YANDEX_DIRECT_CONVERSIONS_PATH.format(date_isoformat))
    ).project(
        # remove hitlog_id because postbacks don't have this field
        # hitlog_id is used only in debug purposes
        qe.all(exclude=('hitlog_id',))
    ).filter(
        qf.nonzero('impression_id')
    )
    return stream.concat(postclicks)


def run_postbacks_stats_yt(day):
    date_isoformat = day.isoformat()
    logger.info('Parse postbacks for %s', date_isoformat)

    postback_parser = PostbackParser()
    promo_events_date_range = (day - dt.timedelta(days=PROMO_EVENTS_DAYS), day)

    cluster = get_cluster()
    job = cluster.env().job()

    parsed_postbacks = job.table(
        ADVISOR_ACCESS_LOG_1_DAY.format(date_isoformat)
    ).filter(
        # TODO: Enable this filtration when https://st.yandex-team.ru/ADVISOR-1707 will be fixed
        # qf.custom(lambda ip, geobase: geobase.region_by_ip(ip).is_yandex, 'ip', qr.resource('Geobase')),
        qf.custom(lambda r: r.startswith('/api/v1/postback/'), 'request')
    ).map(
        postback_parser,
    ).filter(
        # Do not save postbacks from testing devices https://st.yandex-team.ru/ADVISOR-1302
        qf.not_(qf.custom(postback_is_testing, 'impression_id')),
        qf.nonzero('impression_id'),
    ).project(
        'adnetwork',
        'impression_id',
        'request',
        clid=ne.custom(lambda x: parse_impression_id(x)['clid'], 'impression_id'),
        experiment=ne.custom(lambda x: parse_impression_id(x)['experiment'], 'impression_id'),
        payout_ip='ip',
        postback_currency='currency',
        postback_payout_unconverted=ne.custom(float, 'payout'),
        timestamp=ne.custom(lambda x: int(float(x)), 'unixtime_msec'),
    ).call(
        concat_yandex_direct_conversions, job, day
    )

    on_click_promo_events = job.table(
        ADVISOR_PROMO_EVENTS_LOG_1_DAY.format("{%s..%s}" % promo_events_date_range),
        ignore_missing=True
    ).filter(
        qf.equals('event', 'on_click'),
        qf.nonzero('offer_id'),
    ).project(
        'ad_id',
        'android_id',
        'country',
        'device_id',
        'impression_id',
        'offer_id',
        'package_name',
        'place',
        'uuid',
        'view_type',
        click_ip='ip',
        click_timestamp=ne.custom(int, 'unixtime'),
        event_currency='currency',
        event_payout_unconverted=ne.custom(float, 'payout_unconverted'),
        event_payout=ne.custom(float, 'payout'),
        has_promo_event=ne.const(True),
    )

    parsed_postbacks.join(
        on_click_promo_events, by='impression_id', type='left'
    ).project(
        # Get payouts and currencies from promo events if postback doesn't have them.
        qe.all(exclude=('postback_payout_unconverted', 'event_payout_unconverted', 'postback_currency',
                        'event_currency', 'postback_payout', 'event_payout')),
        qe.or_('payout_unconverted', 'postback_payout_unconverted', 'event_payout_unconverted'),
        qe.or_('currency', 'postback_currency', 'event_currency'),
        qe.or_('payout', 'postback_payout', 'event_payout'),
    ).project(
        # Installs from testing server are not being logged in promo events, so we assume that these installs are from
        # testing and should not be taken into account in statistics: set payout to zero and country to empty string
        ne.all(('has_promo_event', 'country', 'payout', 'click_timestamp')),
        country=ne.custom(lambda has_event, country: country if has_event else '', 'has_promo_event', 'country'),
        payout=ne.custom(lambda has_event, payout: payout if has_event else 0.0, 'has_promo_event', 'payout'),
        click_timestamp=ne.custom(lambda x: x or 0, 'click_timestamp'),
    ).filter(
        qf.custom(lambda offer, adnetwork: offer is None or offer.startswith(adnetwork), 'offer_id', 'adnetwork'),
    ).groupby(
        'impression_id'
    ).top(
        1, by='click_timestamp'
    ).put(
        make_yt_path(ADVISOR_POSTBACKS_PATH.format(date_isoformat)),
        schema=dict(
            ad_id=str,
            adnetwork=str,
            android_id=str,
            click_ip=str,
            click_timestamp=int,
            clid=str,
            country=str,
            currency=str,
            device_id=str,
            experiment=str,
            impression_id=str,
            offer_id=str,
            package_name=str,
            payout=float,
            payout_ip=str,
            payout_unconverted=float,
            place=str,
            request=str,
            timestamp=int,
            uuid=str,
            view_type=str,
        )
    )
    job.run()

    yandex_direct_conversions_table = make_yt_path(YANDEX_DIRECT_CONVERSIONS_PATH.format(date_isoformat))
    for record in cluster.read(yandex_direct_conversions_table):
        if not record.impression_id:
            logger.error("Impression_id of some records in table %s is empty", yandex_direct_conversions_table)


logger.info('Cleaning up postbacks stats')


@click.command()
@click.option('--date', '-d', default=get_yesterday, type=DateType(), help='Date in format yyyy-mm-dd')
@click.option('--lookback', '-l', type=int, default=0, help='Number of days ago to run command on')
def cli(date, lookback):
    yt.update_config(YT_CONFIG)

    for d in date_range(date - dt.timedelta(days=lookback), date):  # TODO: optimize (count many days in one command)
        run_postbacks_stats_yt(d)


if __name__ == '__main__':
    cli()
