import pytz
from yt.wrapper import ypath_join
from datetime import timedelta
from dateutil.parser import parse
import logging

logger = logging.getLogger(__name__)

LOGFELLER_TIMEZONE = pytz.timezone('Europe/Moscow')


def get_event_logs(yt_client, event_log_configs):
    event_logs = []
    for config in event_log_configs:
        for name in yt_client.list(config['event_log_folder']):
            event_logs.append(
                {
                    'start': LOGFELLER_TIMEZONE.localize(parse(name)),
                    'end': LOGFELLER_TIMEZONE.localize(parse(name)) + config['duration'],
                    'path': ypath_join(config['event_log_folder'], name)
                }
            )
    logger.debug('Event logs: %s', event_logs)
    return event_logs


def get_target_interval(yt_client, input_archive_path, duration):
    interval_end = parse(
        yt_client.get_attribute(ypath_join(input_archive_path, 'yt_tables'), 'creation_time'),
    ).replace(tzinfo=pytz.utc).astimezone(LOGFELLER_TIMEZONE)
    interval_start = interval_end - duration
    logger.debug('Target interval: %s - %s', interval_start, interval_end)
    return {
        'start': interval_start,
        'end': interval_end,
    }


def get_overlap(interval1, interval2):
    if interval1['start'] <= interval2['end'] and interval1['end'] >= interval2['start']:
        return {
            'start': max(interval1['start'], interval2['start']),
            'end': min(interval1['end'], interval2['end']),
        }


def choose_closest_event_logs(yt_client, input_archive_path, event_log_folder, duration):
    intervals = [
        get_target_interval(yt_client, input_archive_path, duration)
    ]

    # If 1d logs are not specified first, they can be duplicated with 5min logs
    event_logs = get_event_logs(yt_client, [
        {
            'event_log_folder': ypath_join(event_log_folder, '1d'),
            'duration': timedelta(days=1),
        },
        {
            'event_log_folder': ypath_join(event_log_folder, 'stream/5min'),
            'duration': timedelta(minutes=5),
        },
    ])

    used_event_logs = set()

    # Fill interval with logs
    for event_log in event_logs:
        new_intervals = []
        for interval in intervals:
            overlap = get_overlap(event_log, interval)
            logger.debug('Overlap %s of log %s with interval %s', overlap, event_log, interval)
            # If interval overlaps with log, cut interval
            if overlap:
                used_event_logs.add(event_log['path'])
                # Stop searching for event logs if we find first 1 day interval, because this is enough
                if event_log['end'] - event_log['start'] > timedelta(hours=23):
                    return sorted(used_event_logs)
                if interval['start'] != overlap['start']:
                    new_intervals.append({
                        'start': interval['start'],
                        'end': overlap['start'],
                    })
                if interval['end'] != overlap['end']:
                    new_intervals.append({
                        'start': overlap['end'],
                        'end': interval['end'],
                    })
            # If interval does not overlap with log, preserve interval for next check
            else:
                new_intervals.append({
                    'start': interval['start'],
                    'end': interval['end'],
                })
        intervals = new_intervals
        logger.debug('Intermediate intervals: %s', intervals)

    return sorted(used_event_logs)
