#!/usr/bin/env python
# -*- coding: utf-8 -*-

import os

import luigi

from crypta.profile.lib import date_helpers

from crypta.profile.utils import utils
from crypta.profile.utils.config import config
from crypta.profile.utils.loggers import TimeTracker
from crypta.profile.utils.luigi_utils import YtDailyRewritableTarget, OldNodesByNameCleaner, BaseYtTask

from crypta.profile.runners.export_profiles.lib.profiles_generation.get_basic_profiles import GetBasicProfiles
from crypta.profile.runners.export_profiles.lib.profiles_generation.get_devid_profiles import GetBasicDevidProfiles


def merge_reducer(key, rows):
    rows = list(rows)
    yield max(rows, key=lambda record: record['update_time'])


class MergeBasicProfiles(BaseYtTask):
    date = luigi.Parameter()
    data_source = luigi.Parameter()
    priority = 100
    task_group = 'export_profiles'

    def requires(self):
        requires_by_data_source = {
            'yandexuid': {
                'daily_profiles': GetBasicProfiles(self.date),
                'cleaner': OldNodesByNameCleaner(
                    date=self.date,
                    folder=config.RAW_YANDEXUID_PROFILES_YT_DIRECTORY,
                    lifetime=config.NUMBER_OF_INTERMEDIATE_PROFILES_TABLES_TO_KEEP,
                ),
            },
            'devid': {
                'daily_profiles': GetBasicDevidProfiles(self.date),
                'cleaner': OldNodesByNameCleaner(
                    date=self.date,
                    folder=config.RAW_DEVID_PROFILES_YT_DIRECTORY,
                    lifetime=35,
                ),
            },
        }

        return requires_by_data_source[self.data_source]

    def output(self):
        outputs_by_data_source = {
            'yandexuid': YtDailyRewritableTarget(config.MERGED_RAW_YANDEXUID_PROFILES, self.date),
            'devid': YtDailyRewritableTarget(config.MERGED_RAW_DEVID_PROFILES, self.date),
        }
        return outputs_by_data_source[self.data_source]

    def run(self):
        with TimeTracker(self.__class__.__name__), self.yt.Transaction():
            number_of_days_by_data_source = {
                'yandexuid': config.NUMBER_OF_INTERMEDIATE_PROFILES_TABLES_TO_KEEP,
                'devid': 35,
            }

            dates = date_helpers.generate_back_dates(self.date, number_of_days_by_data_source[self.data_source])
            input_tables = []

            input_directory = os.path.dirname(self.input()['daily_profiles'].table)

            for date in dates:
                table = os.path.join(input_directory, date)
                if self.yt.exists(table):
                    input_tables.append(table)

            schema_by_data_source = {
                'yandexuid': utils.yandexuid_classification_schema,
                'devid': utils.devid_classification_schema,
            }

            self.yt.create_empty_table(
                self.output().table,
                schema=schema_by_data_source[self.data_source],
            )

            key_columns_by_data_source = {
                'yandexuid': ('yandexuid', ),
                'devid': ('id', 'id_type'),
            }

            self.yt.run_map_reduce(
                None,
                merge_reducer,
                input_tables,
                self.output().table,
                reduce_by=key_columns_by_data_source[self.data_source],
            )

            self.yt.run_sort(
                self.output().table,
                sort_by=key_columns_by_data_source[self.data_source],
            )

            self.yt.set_attribute(
                self.output().table,
                'generate_date',
                self.date,
            )
