#!/usr/bin/env python
# -*- coding: utf-8 -*-

import os
import sys

import luigi

from crypta.profile.lib import date_helpers

from crypta.profile.utils.config import config
from crypta.profile.utils.utils import segment_storage_by_id_schema
from crypta.profile.utils.loggers import TimeTracker
from crypta.profile.utils.segment_storage import ProfileBuilder
from crypta.profile.utils.luigi_utils import YtTarget, BaseYtTask, OldNodesByNameCleaner

from crypta.profile.runners.export_profiles.lib.profiles_generation.get_regular_lal import GetRegularLal


SECONDS_IN_A_DAY = 24 * 60 * 60
MAX_KEY_SIZE = 1024 * 15


class StorageBuilderById(ProfileBuilder):
    def __init__(self, update_time):
        self.update_time = update_time

    def __call__(self, key, rows):
        output_record = {
            'id': key['id'],
            'id_type': key['id_type'],
        }

        if sys.getsizeof(key['id']) + sys.getsizeof(key['id_type']) > MAX_KEY_SIZE:
            return

        segments = self.combine_segments(rows)
        output_record.update(segments)
        output_record['update_time'] = self.update_time

        yield output_record


class GetDailySegmentsById(BaseYtTask):
    date = luigi.Parameter()
    data_source = luigi.Parameter()

    priority = 100
    task_group = 'export_profiles'

    def requires(self):
        return {
            'lal': GetRegularLal(self.date),
            'cleaner': OldNodesByNameCleaner(
                self.date,
                folder=os.path.join(config.DAILY_MERGED_SEGMENTS_BY_ID_YT_DIRECTORY, self.data_source),
                lifetime=config.NUMBER_OF_INTERMEDIATE_PROFILES_TABLES_TO_KEEP,
            ),
        }

    def output(self):
        return YtTarget(
            os.path.join(
                config.DAILY_MERGED_SEGMENTS_BY_ID_YT_DIRECTORY,
                self.data_source,
                self.date,
            )
        )

    def run(self):
        with TimeTracker(monitoring_name='{}_{}'.format(self.__class__.__name__, self.data_source)):
            with self.yt.Transaction():
                if self.data_source == 'indevice':
                    input_tables = list(self.yt.search(
                        config.PROFILES_SEGMENT_PARTS_YT_DIRECTORY,
                        node_type=['table', 'link'],
                        attributes=['indevice'],
                        object_filter=lambda node: node.attributes.get('indevice') is not None,
                        exclude=[os.path.join(config.PROFILES_SEGMENT_PARTS_YT_DIRECTORY, 'indevice')],
                    ))

                    input_tables.extend(
                        self.yt.search(
                            os.path.join(config.PROFILES_SEGMENT_PARTS_YT_DIRECTORY, 'indevice'),
                            node_type=['table', 'link'],
                        )
                    )
                else:
                    input_tables = list(self.yt.search(
                        config.PROFILES_SEGMENT_PARTS_YT_DIRECTORY,
                        node_type=['table', 'link'],
                        attributes=['indevice'],
                        object_filter=lambda node: node.attributes.get('indevice') is None,
                        exclude=[os.path.join(config.PROFILES_SEGMENT_PARTS_YT_DIRECTORY, 'indevice')],
                    ))

                self.logger.info('Input tables: {}'.format(input_tables))

                self.yt.create_empty_table(
                    self.output().table,
                    schema=segment_storage_by_id_schema,
                )

                update_time = date_helpers.from_utc_date_string_to_noon_timestamp(self.date)
                self.yt.run_map_reduce(
                    None,
                    StorageBuilderById(update_time),
                    input_tables,
                    self.output().table,
                    reduce_by=['id', 'id_type'],
                )

                self.yt.run_sort(
                    self.output().table,
                    sort_by=['id', 'id_type'],
                )
