import copy
import json
from os.path import join

import luigi
from yt.wrapper import with_context, create_table_switch

import crypta.lib.python.spine.consts.attributes as spine_attributes
import crypta.lib.python.yt.time_utils as yt_time_utils
from crypta.profile.lib import date_helpers
from crypta.profile.utils.config import config
from crypta.profile.utils.loggers import TimeTracker
from crypta.profile.utils.luigi_utils import YtTarget, YtDailyRewritableTarget, BaseYtTask, OldNodesByNameCleaner
from crypta.profile.runners.export_profiles.lib.profiles_generation.get_statbox_crypta_profiles_log import GetStatboxCryptaProfilesLog
from crypta.profile.runners.export_profiles.lib.profiles_generation.get_crypta_id_profiles_log import GetCryptaIdProfilesLog


SECONDS_IN_A_DAY = 24 * 60 * 60
daily_export_table_index = 0
bb_storage_table_index = 1


@with_context
class BbStorageProcessor(object):
    def __init__(self, outdated_keyword_threshold, outdated_shortterm_interests_threshold):
        self.outdated_keyword_threshold = outdated_keyword_threshold
        self.outdated_shortterm_interests_threshold = outdated_shortterm_interests_threshold

    def _delete_old_shortterm_interests(self, record):
        if 'shortterm_interests' in record and record['shortterm_interests']:
            new_shortterm_interests = {
                interest_id: interest_timestamp
                for interest_id, interest_timestamp in record['shortterm_interests'].iteritems()
                if interest_timestamp > self.outdated_shortterm_interests_threshold
            }
            record['shortterm_interests'] = new_shortterm_interests or None

    @staticmethod
    def get_daily_export_and_new_storage_record(profiles_record, storage_record):
        fields_to_delete = []
        daily_export_record = copy.deepcopy(profiles_record)
        new_storage_record = copy.deepcopy(storage_record)

        for field_name, new_profile_value in profiles_record.iteritems():
            storage_value = storage_record.get(field_name)

            if new_profile_value is None and storage_value is not None:
                fields_to_delete.append(field_name)

            if field_name == 'shortterm_interests':
                new_storage_record[field_name] = storage_value or {}
                if new_profile_value is not None:
                    for segment_id, segment_timestamp in new_profile_value.iteritems():
                        new_storage_record[field_name][segment_id] = max(
                            new_storage_record[field_name].get(segment_id, 0),
                            segment_timestamp,
                        )
            elif field_name == '@table_index':
                pass
            else:
                new_storage_record[field_name] = new_profile_value

        if fields_to_delete:
            daily_export_record['fields_to_delete'] = fields_to_delete

        if 'shortterm_interests' in daily_export_record:
            del daily_export_record['shortterm_interests']

        return daily_export_record, new_storage_record

    def __call__(self, key, records, context):
        profiles_record = None
        storage_record = None

        for record in records:
            if context.table_index == 0:
                profiles_record = record
            elif context.table_index == 1:
                storage_record = record

        if profiles_record and not storage_record:
            bb_storage_record = copy.deepcopy(profiles_record)
            self._delete_old_shortterm_interests(bb_storage_record)
            yield create_table_switch(bb_storage_table_index)
            yield bb_storage_record

            if 'shortterm_interests' in profiles_record:
                del profiles_record['shortterm_interests']

            yield create_table_switch(daily_export_table_index)
            yield profiles_record

        elif profiles_record and storage_record:
            daily_export_record, new_storage_record = self.get_daily_export_and_new_storage_record(
                profiles_record,
                storage_record,
            )

            is_whole_profile_is_outdated = daily_export_record['update_time'] <= self.outdated_keyword_threshold + 3600
            if not is_whole_profile_is_outdated:
                yield create_table_switch(daily_export_table_index)
                yield daily_export_record

                self._delete_old_shortterm_interests(new_storage_record)
                yield create_table_switch(bb_storage_table_index)
                yield new_storage_record

        elif not profiles_record and storage_record:
            # to take some emergency updates into account
            is_whole_profile_is_outdated = storage_record['update_time'] <= self.outdated_keyword_threshold + 3600
            if not is_whole_profile_is_outdated:
                self._delete_old_shortterm_interests(storage_record)

                yield create_table_switch(bb_storage_table_index)
                yield storage_record


class GetDailyExportAndProcessBbStorage(BaseYtTask):
    date = luigi.Parameter()
    data_source = luigi.Parameter()  # yandexuid or crypta_id
    priority = 100
    task_group = 'export_profiles'

    def requires(self):
        required_tasks = {
            'yandexuid': GetStatboxCryptaProfilesLog(self.date),
            'crypta_id': GetCryptaIdProfilesLog(self.date),
        }

        dir_to_clean = {
            'crypta_id': config.CRYPTAID_DAILY_EXPORT_DIRECTORY,
            'yandexuid': config.YANDEXUID_DAILY_EXPORT_DIRECTORY,
        }

        return {
            'ProfilesLog': required_tasks[self.data_source],
            'Cleaner': OldNodesByNameCleaner(
                self.date,
                folder=dir_to_clean[self.data_source],
                lifetime=config.NUMBER_OF_INTERMEDIATE_PROFILES_TABLES_TO_KEEP,
            ),
        }

    def output(self):
        daily_export = {
            'crypta_id': config.CRYPTAID_DAILY_EXPORT_DIRECTORY,
            'yandexuid': config.YANDEXUID_DAILY_EXPORT_DIRECTORY,
        }

        storage = {
            'crypta_id': config.CRYPTAID_EXPORT_PROFILES_14_DAYS_TABLE,
            'yandexuid': config.YANDEXUID_EXPORT_PROFILES_14_DAYS_TABLE,
        }

        storage_table = storage[self.data_source]
        output_daily_export_table = join(
            daily_export[self.data_source],
            self.date,
        )

        return {
            'storage': YtDailyRewritableTarget(storage_table, date=self.date),
            'daily_export': YtTarget(output_daily_export_table),
        }

    def run(self):
        with TimeTracker('{}_{}'.format(self.__class__.__name__, self.data_source)):
            source_table = self.input()['ProfilesLog'].table
            storage_table = self.output()['storage'].table
            daily_export_table = self.output()['daily_export'].table

            current_time = date_helpers.from_utc_date_string_to_noon_timestamp(self.date)
            number_of_days_to_track_storage = config.NUMBER_OF_INTERMEDIATE_PROFILES_TABLES_TO_KEEP

            outdated_entry_threshold = current_time - SECONDS_IN_A_DAY * number_of_days_to_track_storage
            outdated_shortterm_interests_threshold = current_time - SECONDS_IN_A_DAY * config.SHORTTERM_INTERESTS_EXPIRE_DAYS
            self.logger.info('current_time = {} outdated_entry_threshold = {}'.format(current_time, outdated_entry_threshold))

            with self.yt.Transaction(), self.yt.TempTable() as temporary_storage_table:

                source_tables = (source_table, temporary_storage_table)

                if not self.yt.exists(storage_table):
                    # storage is empty at the first run
                    self.logger.error('Can not find storage table {table}'.format(table=storage_table))
                    source_tables = (source_table,)
                else:
                    self.yt.copy(storage_table, temporary_storage_table, force=True)

                self.logger.info('processing bb storage')

                # table order in tuples matters!
                for table_name in source_tables:
                    self.yt.sort_if_needed(table_name, sort_by=self.data_source)

                source_table_attributes = self.yt.get_table_attributes(source_table)
                basic_table_schema = source_table_attributes['schema']

                daily_export_schema = [
                    {'name': 'fields_to_delete', 'type': 'any'},
                ]

                for field in basic_table_schema:
                    if field['name'] == 'shortterm_interests':
                        continue
                    if 'sort_order' in field:
                        del field['sort_order']
                    daily_export_schema.append(field)

                self.yt.create_empty_table(
                    storage_table,
                    schema=basic_table_schema,
                    additional_attributes={
                        'not_strict_bb_keyword_id_to_field_names': source_table_attributes['not_strict_bb_keyword_id_to_field_names'],
                        'bb_keywords_to_field_name': source_table_attributes['bb_keywords_to_field_name'],
                    },
                    erasure=False,
                )
                self.yt.create_empty_table(
                    daily_export_table,
                    schema=daily_export_schema,
                )

                destination_tables = (daily_export_table, storage_table)
                operation = self.yt.run_reduce(
                    BbStorageProcessor(
                        outdated_keyword_threshold=outdated_entry_threshold,
                        outdated_shortterm_interests_threshold=outdated_shortterm_interests_threshold,
                    ),
                    source_table=source_tables,
                    destination_table=destination_tables,
                    reduce_by=self.data_source,
                )
                self.logger.info(json.dumps(operation.get_job_statistics()))
                self.yt.set_attribute(storage_table, 'generate_date', self.date)

                for table_name in destination_tables:
                    self.yt.sort_if_needed(table_name, sort_by=self.data_source)
                    self.yt.set_attribute(table_name, spine_attributes.GENERATE_DATETIME, yt_time_utils.get_current_yt_timestamp())
