import logging
from datetime import datetime, timedelta
from collections import defaultdict

from sandbox import sdk2
from sandbox.sandboxsdk.environments import PipEnvironment
from sandbox.projects.collections.mixins import YasmReportable, yt_path_to_yasm


class BorderDays:
    def __init__(self, critical_day_count, one_in_week_day_count, dead_day_count):
        self.critical_day = BorderDays.__get_date_before_days(critical_day_count)
        self.one_in_week_day = BorderDays.__get_date_before_days(one_in_week_day_count)
        self.dead_day = BorderDays.__get_date_before_days(dead_day_count)

    @staticmethod
    def __get_date_before_days(days):
        if days is None:
            return datetime.min.date()
        return datetime.now().date() - timedelta(days=days)


class ByTimeFilter:
    @staticmethod
    def __group_by_day(records):
        day_map = defaultdict(list)
        try:
            for record in records:
                if record.attributes['type'] == 'link':
                    continue
                raw_datetime = str(record)
                date_end_index = raw_datetime.find('T')
                date = datetime.strptime(raw_datetime[:date_end_index], "%Y-%m-%d").date()
                day_map[date].append(raw_datetime)
        except ValueError:
            logging.error("ByTimeFilter ERROR: Filter can't process directories names")
            raise
        return day_map

    @staticmethod
    def filter(records, border_days):
        to_delete = []
        by_day_dumps = ByTimeFilter.__group_by_day(records)
        for day, day_dumps in by_day_dumps.iteritems():
            if day < border_days.dead_day:
                to_delete.extend(day_dumps)
            elif day < border_days.one_in_week_day:
                if day.weekday() != 0:
                    to_delete.extend(day_dumps)
            elif day < border_days.critical_day:
                to_delete.extend(day_dumps[:-1])
        return to_delete


class CollectionsBackupThiningTask(sdk2.Task, YasmReportable):

    class Requirements(sdk2.Requirements):
        environments = [
            PipEnvironment('yandex-yt'),
        ]
        cores = 1
        ram = 1024

        class Caches(sdk2.Requirements.Caches):
            pass

    class Parameters(sdk2.Task.Parameters):
        yt_proxy = sdk2.parameters.String('YT proxy', required=True, default='banach')
        yt_base_directory = sdk2.parameters.String('YT base directory', required=True)
        monitoring_server_host = sdk2.parameters.String(
            'Monitoring server',
            default='monit.n.yandex-team.ru',
        )
        with sdk2.parameters.Group("Time limits") as limits_block:
            critical_day_count = sdk2.parameters.Integer(
                'Critical day count (older records will save only one backup for one day)',
                default=None
            )
            one_in_week_count = sdk2.parameters.Integer(
                'OneInWeek day count (older records will save only one backup in week)',
                default=None
            )
            dead_day_count = sdk2.parameters.Integer(
                'Dead day count (older records will be totally removed)',
                default=None
            )
        delete_link_targets = sdk2.parameters.Bool(
            'Delete link\'s targets',
            default=False,
            required=False,
        )

    @property
    def _critical_day_count(self):
        return self.Parameters.critical_day_count

    @property
    def _dead_day_count(self):
        return self.Parameters.dead_day_count

    @property
    def _one_in_week_day_count(self):
        return self.Parameters.one_in_week_count

    def get_path_to_dump(self, name):
        from yt.wrapper import ypath_join
        path = ypath_join(self.Parameters.yt_base_directory, name)
        return path

    def get_dumps_records(self, client):
        list_ = client.list(self.Parameters.yt_base_directory, attributes=['type'])
        return list_

    def search_link_targets(self, client, path):
        if not client.exists(path):
            logging.info('Path %s does not exist', path)
            return
        links = client.search(
            root=path,
            node_type=['link'],
            attributes=['target_path', 'broken'],
        )
        for link in links:
            if link.attributes['broken']:
                logging.info('Link %s is broken. Skip.', str(link))
                continue
            yield link.attributes['target_path']

    @staticmethod
    def make_signal_name(cluster, path):
        path = yt_path_to_yasm(path)
        return 'collections_backup_thining_{}_{}'.format(
            cluster, path,
        )

    def on_execute(self):
        from yt.wrapper import YtClient

        yt_token = sdk2.Vault.data("YASAP", "yt-hahn-token")
        yt_proxy = self.Parameters.yt_proxy
        _yt = YtClient(
            proxy=yt_proxy,
            token=yt_token,
        )
        dumps_to_delete = ByTimeFilter.filter(
            self.get_dumps_records(client=_yt),
            border_days=BorderDays(
                self._critical_day_count,
                self._one_in_week_day_count,
                self._dead_day_count,
            ),
        )
        with _yt.Transaction():
            for name in dumps_to_delete:
                paths = [self.get_path_to_dump(name)]
                # we should atomically delete converted to mongo db format YDB tables
                if self.Parameters.delete_link_targets:
                    for link in self.search_link_targets(client=_yt, path=path):
                        paths.append(link)
                for path in paths:
                    _yt.remove(path, recursive=True)
                    logging.info(path)
        self._report_lag(
            self.make_signal_name(
                self.Parameters.yt_proxy, self.Parameters.yt_base_directory
            ),
        )


__TASK__ = CollectionsBackupThiningTask
