# -*- coding: utf-8 -*-
import datetime
import logging

from yt.wrapper import YtClient

from travel.hotels.lib.python3.yt import ytlib

from travel.hotels.tools.dataset_curator.data import Dataset, CleanupAgeDetectionMode

LOG = logging.getLogger(__name__)


class DatasetCleanupRunner:
    def __init__(self, yt_token: str):
        self.yt_token = yt_token
        self.yt_clients = {}

    def _get_yt_client(self, yt_cluster: str) -> YtClient:
        if yt_cluster not in self.yt_clients:
            self.yt_clients[yt_cluster] = YtClient(proxy=yt_cluster, token=self.yt_token)
        return self.yt_clients[yt_cluster]

    @staticmethod
    def parse_time_from_name(name):
        try:
            return datetime.datetime.strptime(name, '%Y-%m-%d')
        except ValueError:
            return datetime.datetime.strptime(name, '%Y-%m-%dT%H:%M:%SZ')

    def cleanup_dataset(self, dataset: Dataset):
        if dataset.cleanup_rules is None or len(dataset.cleanup_rules) == 0:
            LOG.debug(f'Dataset {dataset.name} has no cleanup rules')
            return

        LOG.debug(f'Starting cleanup for {dataset.name}')
        now = datetime.datetime.utcnow()
        for cleanup_rule in dataset.cleanup_rules:
            yt_clusters = cleanup_rule.yt_clusters if cleanup_rule.yt_clusters is not None else dataset.yt_clusters
            for yt_cluster in yt_clusters:
                LOG.debug(f'Cleanup of {dataset.name}: Processing cluster "{yt_cluster}"')
                yt_client = self._get_yt_client(yt_cluster)
                all_versions = dataset.get_dataset_versions(yt_client, ignore_max_age=True)
                last_version_ids = [x.get_id() for x in sorted(all_versions, key=lambda x: self.parse_time_from_name(x.node_name), reverse=True)[:cleanup_rule.keep_last]]
                for dataset_version in all_versions:
                    if cleanup_rule.mode != CleanupAgeDetectionMode.BY_NAME:
                        raise Exception(f'Unknown cleanup mode: {cleanup_rule.mode}')
                    dataset_date = self.parse_time_from_name(dataset_version.node_name)
                    if cleanup_rule.keep_newer_than is not None:
                        min_date = now - cleanup_rule.keep_newer_than
                        if dataset_date >= min_date:
                            LOG.debug(f'Cleanup of {dataset.name}: Skipping {dataset_version.path}, {yt_cluster} ({dataset_date} >= {min_date})')
                            continue
                    if dataset_version.get_id() in last_version_ids:
                        LOG.debug(f'Cleanup of {dataset.name}: Skipping {dataset_version.path}, {yt_cluster} (among last {cleanup_rule.keep_last})')
                        continue
                    if cleanup_rule.subpaths is not None:
                        paths = [ytlib.join(dataset_version.path, subpath) for subpath in cleanup_rule.subpaths]
                        LOG.info(f'Cleanup of {dataset.name}, {yt_cluster}: Removing {paths}')
                        for path in paths:
                            yt_client.remove(path, recursive=True, force=True)
                    else:
                        LOG.info(f'Cleanup of {dataset.name}, {yt_cluster}: Removing {dataset_version.path}')
                        yt_client.remove(dataset_version.path, recursive=True, force=True)
