#!/usr/bin/env python
# -*- coding: utf-8 -*-

import os
from functools import partial

import luigi

from crypta.profile.lib import date_helpers

from crypta.profile.utils.config import config
from crypta.profile.utils.interests_helpers import LabSegmentsInfo
from crypta.profile.utils.luigi_utils import BaseYtTask, YtDailyRewritableTarget, ExternalInput


def get_max_timestamp_reducer(key, rows, oldest_interest_timestamp):
    out_row = dict(key)
    out_row['max_timestamp'] = max((row['timestamp'] for row in rows))
    if out_row['max_timestamp'] > oldest_interest_timestamp:
        yield out_row


def get_shortterm_interests_reducer(key, rows, shortterm_interest_ids):
    out_row = {
        'id': str(key['yandexuid']),
        'id_type': 'yandexuid',
        'shortterm_interests': {},
    }

    for row in rows:
        out_row['shortterm_interests'][str(shortterm_interest_ids[row['interest_lab_id']])] = row['max_timestamp']

    yield out_row


class ShorttermInterestsTask(BaseYtTask):
    date = luigi.Parameter()
    task_group = 'shortterm_interests'

    def requires(self):
        return ExternalInput(os.path.join(config.INTERESTS_PROCESSED_FOLDER, self.date, 'processed_bb_dump'))

    def output(self):
        return YtDailyRewritableTarget(
            os.path.join(config.PROFILES_SEGMENT_PARTS_YT_DIRECTORY, 'shortterm_interests'),
            self.date,
        )

    def run(self):
        self.yt.config['spec_defaults']['pool'] = config.INTERESTS_POOL

        with self.yt.Transaction(), \
                self.yt.TempTable() as yandexuid_interests_table:

            self.yt.create_empty_table(
                self.output().table,
                schema={
                    'id': 'string',
                    'id_type': 'string',
                    'shortterm_interests': 'any',
                },
            )

            input_tables = []
            oldest_possible_date_for_interests = date_helpers.get_date_from_past(self.date, config.SHORTTERM_INTERESTS_EXPIRE_DAYS)
            for date in self.yt.list(config.INTERESTS_PROCESSED_FOLDER):
                if date >= oldest_possible_date_for_interests:
                    input_tables.extend(
                        self.yt.search(os.path.join(config.INTERESTS_PROCESSED_FOLDER, date), node_type='table')
                    )

            # we don't have to be too accurate here, because old interests will also be removed later in the process
            oldest_interest_timestamp = date_helpers.from_utc_date_string_to_timestamp(date_helpers.get_date_from_past(self.date, 5))

            self.yt.run_reduce(
                partial(
                    get_max_timestamp_reducer,
                    oldest_interest_timestamp=oldest_interest_timestamp,
                ),
                input_tables,
                yandexuid_interests_table,
                reduce_by=('yandexuid', 'interest_lab_id'),
            )

            self.yt.run_map_reduce(
                None,
                partial(
                    get_shortterm_interests_reducer,
                    shortterm_interest_ids=LabSegmentsInfo().lab_segment_id_to_shortterm_interest_id,
                ),
                yandexuid_interests_table,
                self.output().table,
                reduce_by='yandexuid',
            )

            self.yt.run_sort(
                self.output().table,
                sort_by=['id', 'id_type'],
            )

            self.yt.set_attribute(
                self.output().table,
                'generate_date',
                self.date,
            )
