#!/usr/bin/env python
# -*- coding: utf-8 -*-

import os
import json

import luigi
from yt.wrapper import with_context

from crypta.profile.utils import utils
from crypta.profile.utils.config import config
from crypta.profile.utils.luigi_utils import YtTarget, BaseYtTask, OldNodesByNameCleaner
from crypta.profile.utils.loggers import TimeTracker
from crypta.profile.runners.export_profiles.lib.profiles_generation.get_socdem_by_crypta_id import GetSocdemByCryptaId
from crypta.profile.runners.export_profiles.lib.profiles_generation.get_merged_segments import CombineSegmentsByYandexuid
from crypta.profile.runners.export_profiles.lib.profiles_generation.get_additional_yandexuid_attributes import GetAdditionalYandexuidAttributes


@with_context
def join_by_yandexuid(key, rows, context):
    socdem_row = None
    attribute_row = {}
    segments_row = {}

    for row in rows:
        if context.table_index == 0:
            socdem_row = row
        elif context.table_index == 1:
            if not socdem_row:
                return
            attribute_row = row
        else:
            if not socdem_row:
                return
            segments_row = row

    if socdem_row:
        socdem_row.update(attribute_row)
        socdem_row.update(segments_row)
        yield socdem_row


class AddSegmentsToProfiles(BaseYtTask):
    date = luigi.Parameter()
    priority = 100
    task_group = 'export_profiles'

    def requires(self):
        return {
            'socdem': GetSocdemByCryptaId(self.date),
            'yandexuid_attributes': GetAdditionalYandexuidAttributes(self.date),
            'expanded_segments': CombineSegmentsByYandexuid(self.date),
            'old_nodes_cleaner': OldNodesByNameCleaner(
                self.date,
                folder=config.JOINED_YANDEXUID_PROFILES_YT_DIRECTORY,
                lifetime=config.NUMBER_OF_INTERMEDIATE_PROFILES_TABLES_TO_KEEP,
            ),
        }

    def output(self):
        return YtTarget(os.path.join(config.JOINED_YANDEXUID_PROFILES_YT_DIRECTORY, self.date))

    def run(self):
        with TimeTracker(monitoring_name=self.__class__.__name__):
            with self.yt.Transaction():
                self.yt.create_empty_table(
                    self.output().table,
                    schema=utils.crypta_yandexuid_profiles_schema
                )

                operation = self.yt.run_reduce(
                    join_by_yandexuid,
                    source_table=[
                        self.input()['socdem']['corrected_yandexuid_profiles'].table,
                        self.input()['yandexuid_attributes'].table,
                        self.input()['expanded_segments'].table,
                    ],
                    destination_table=self.output().table,
                    reduce_by='yandexuid',
                )
                self.logger.info(json.dumps(operation.get_job_statistics()))
                self.yt.run_sort(self.output().table, sort_by='yandexuid')
