from __future__ import unicode_literals

import json
import time
import bisect
import logging
import textwrap
import datetime as dt
import functools as ft
import itertools as it
import collections

import six
# noinspection PyUnresolvedReferences,PyPackageRequirements
import setproctitle
import botocore.exceptions
import requests.exceptions
# noinspection PyUnresolvedReferences
import requests.packages.urllib3

from sandbox.common import os as common_os
from sandbox.common import log as common_log
from sandbox.common import abc as common_abc
from sandbox.common import mds as common_mds
from sandbox.common import tvm as common_tvm
from sandbox.common import rest as common_rest
from sandbox.common import config as common_config
from sandbox.common import format as common_format
from sandbox.common import patterns as common_patterns
from sandbox.common import itertools as common_it
from sandbox.common import statistics as common_statistics
from sandbox.common.types import resource as ctr
from sandbox.common.types import statistics as cts
from sandbox.common.types import notification as ctn

from sandbox.yasandbox.database import mapping

from sandbox.services.base import service

logger = logging.getLogger(__name__)


def update_progress(progress, parts=3):
    title = setproctitle.getproctitle()
    title_parts = title.split(common_os.PROCESS_TITLE_DELIMITER, parts)[:parts]
    title_parts.append(progress)
    setproctitle.setproctitle(common_os.PROCESS_TITLE_DELIMITER.join(title_parts))


class ResourceInfo(
    collections.namedtuple(
        "ResourceInfo",
        "id size multifile mds hosts_states executable path skynet_id time__updated state type owner"
    )
):
    @classmethod
    def from_db(cls, *args):
        args = list(args)
        args[1] <<= 10  # resource size saved in KiB
        return ResourceInfo(*args)


class GCState(common_patterns.Abstract):
    __slots__ = (
        "bucket", "current_key", "rid", "keys", "meta_key", "sizes", "skip", "clean", "last_keys",
        # statistics
        "removed_count", "removed_size", "total_keys", "checked_count", "checked_size", "meta_size", "index_size",
    )
    __defs__ = (
        None, "", None, set(), None, {}, False, False, [],
        0, 0, 0, 0, 0, 0, 0,
    )

    def next_resource(self, rid):
        # noinspection PyAttributeOutsideInit
        self.rid = rid
        for i, attr in enumerate(self.__slots__):
            if attr in ("keys", "meta_key", "sizes", "skip", "last_keys"):
                setattr(self, attr, self._value(attr, i, (), {}))

    # noinspection PyAttributeOutsideInit
    def skip_resource(self):
        self.skip = True
        self.keys = set()

    def detect_meta_info(self, key, size):
        self.last_keys.append(key)
        if len(self.last_keys) > 3:
            self.last_keys.pop(0)
        if len(key.split("/")) == 1:
            self.meta_size += size
        elif (
            len(self.last_keys) == 3 and
            self.last_keys[0].isdigit() and
            self.last_keys[1].startswith(self.last_keys[0] + "/") and
            key == self.last_keys[1] + ".index"
        ):
            self.index_size += size

    def dump(self):
        return {
            "bucket": self.bucket,
            "current_key": self.current_key,
            "rid": self.rid,
            "keys": list(self.keys),
            "meta_key": self.meta_key,
            "sizes": list(six.iteritems(self.sizes)),
            "skip": self.skip,
            "clean": self.clean,
            "last_keys": self.last_keys,
            "removed_count": self.removed_count,
            "removed_size": self.removed_size,
            "total_keys": self.total_keys,
            "checked_count": self.checked_count,
            "checked_size": self.checked_size,
            "meta_size": self.meta_size,
            "index_size": self.index_size,
        }

    @classmethod
    def load(cls, data):
        if data is None:
            return cls()
        return cls(
            data["bucket"],
            data["current_key"],
            data["rid"],
            set(data["keys"]),
            data["meta_key"],
            dict(data["sizes"]),
            data["skip"],
            data["clean"],
            data["last_keys"],
            data["removed_count"],
            data["removed_size"],
            data["total_keys"],
            data["checked_count"],
            data["checked_size"],
            data["meta_size"],
            data["index_size"],
        )

    @property
    def progress(self):
        return "{}%".format(self.checked_count * 100 // self.total_keys if self.total_keys else "NaN")

    def format_stat(self):
        return "Checked {} ({}) / {} ({}) | Removed {} ({}) | Metadata size: {} | Tarball indexes size: {}".format(
            self.checked_count, common_format.size2str(self.checked_size),
            self.total_keys, self.progress,
            self.removed_count, common_format.size2str(self.removed_size),
            common_format.size2str(self.meta_size),
            common_format.size2str(self.index_size),
        )


def patched_requests_urllib(method):
    @ft.wraps(method)
    def wrapper(*args, **kws):
        # noinspection PyUnresolvedReferences
        original_gai = requests.packages.urllib3.util.connection.allowed_gai_family
        try:
            return method(*args, **kws)
        finally:
            # Restore original function since it could be modified during multi-threaded usage of skyboned library
            # noinspection PyUnresolvedReferences
            requests.packages.urllib3.util.connection.allowed_gai_family = original_gai
    return wrapper


# noinspection PyDefaultArgument
def set_process_title(method, main_proc_title=[]):
    @ft.wraps(method)
    def wrapper(*args, **kws):
        if not main_proc_title:
            main_proc_title.append(setproctitle.getproctitle())
        now_str = dt.datetime.now().strftime("%Y-%m-%d %T")
        setproctitle.setproctitle(
            common_os.PROCESS_TITLE_DELIMITER.join((main_proc_title[0], method.__name__, now_str))
        )
        return method(*args, **kws)
    return wrapper


class MDSCleaner(service.MultiprocessedService):
    """
    Removes deleted and broken resources from S3 MDS buckets.
    Implements LRU policy for resources in common MDS quote.
    """

    tick_interval = 60
    notification_timeout = 60

    EXCESS_DEFAULT_BUCKET_SIZE_THRESHOLD = (1 + 5 ** 0.5) / 2 - 1  # Phi - 1
    BROKEN_RESOURCES_TTL = 1  # in days
    MAX_RESOURCES_AT_ONCE = 20000
    MIN_RESOURCE_SOURCES = 1
    REMOVE_PROCESS_POOL_SIZE = 20
    REMOVE_THREAD_POOL_SIZE = 5
    GC_WORKERS_POOL_SIZE = 20
    GC_DELAY = 24  # hours

    USER_BUCKET_QUOTA_WARNING_THRESHOLD = 0.9
    CHECK_USER_BUCKET_QUOTA_INTERVAL = 86400

    BUCKETS_WITHOUT_GC = ("sandbox-backup",)

    def __init__(self, *args, **kwargs):
        super(MDSCleaner, self).__init__(*args, **kwargs)
        logging.root.setLevel(logging.INFO)

    @property
    def targets(self):
        return [
            self.Target(
                function=func,
                interval=interval,
                log_execution=True,
                stateful=True,
            )
            for func, interval in (
                (self.cleanup_non_ready_resources, 600),
                (self.cleanup_excess_default_bucket_data, 300),
                (self.cleanup_garbage, 21600),
                (self.check_quotas, self.CHECK_USER_BUCKET_QUOTA_INTERVAL),
            )
        ]

    @staticmethod
    def _calculate_excess_bucket_data_size(bucket, threshold, thread_logger):
        # noinspection PyBroadException
        try:
            mds_settings = common_config.Registry().common.mds
            s3_idm = common_rest.Client(base_url=mds_settings.s3.idm.url)
            data = s3_idm.stats.buckets[bucket][:]
            max_size = data.get("max_size")
            used_space = data.get("used_space")
            if max_size is None:
                thread_logger.warning("max_size for bucket '%s' is not set, skipping", bucket)
                return 0
            limit = int(max_size * threshold)
            thread_logger.info(
                "Bucket %s: used %s from %s, limit %s",
                bucket,
                common_format.size2str(used_space),
                common_format.size2str(max_size),
                common_format.size2str(limit)
            )
            if used_space > limit:
                return used_space - limit
        except Exception:
            thread_logger.exception("Error while calculating extra bucket size")
        return 0

    def _remove_from_mds(self, res, tvm_ticket, deadline, thread_logger, send_signal=True):
        if self.stop_event.is_set() or dt.datetime.utcnow() >= deadline:
            return
        mds_key = res.mds["k"]
        if mds_key.split("/")[0] != str(res.id):
            thread_logger.warning("Resource #%s has old MDS key", res.id)
            mapping.Resource.objects.with_id(res.id).update(unset__mds=True, unset__force_cleanup=True)
            return False
        thread_logger.info(
            "Removing resource #%s (updated at %s) with size %s from S3 bucket %s",
            res.id, res.time__updated, common_format.size2str(res.size), res.mds.get("n", ctr.DEFAULT_S3_BUCKET)
        )
        # noinspection PyUnresolvedReferences
        try:
            if res.skynet_id:
                try:
                    common_mds.S3().skyboned_remove(res.skynet_id, res.id, tvm_ticket, logger=thread_logger)
                    thread_logger.info("Resource #%s (updated at %s) removed from SkyboneD", res.id, res.time__updated)
                except common_mds.S3.RBTorrentForbidden as ex:
                    thread_logger.warning("Error while removing resource #%s from SkyboneD: %s", res.id, ex)
            bucket = res.mds.get("n")
            if res.multifile:
                for keys_chunk in common_it.grouper(
                    it.chain(common_mds.S3().uploaded_keys(bucket, res.id), [str(res.id)]), 1000
                ):
                    common_mds.S3().delete(keys_chunk, False, namespace=bucket, logger=thread_logger)
            else:
                common_mds.S3().delete(mds_key, False, namespace=bucket, logger=thread_logger)
            thread_logger.info("Resource #%s (updated at %s) removed from MDS", res.id, res.time__updated)
        except (
            common_mds.S3.Exception, botocore.exceptions.ClientError, requests.exceptions.ConnectionError, RuntimeError
        ) as ex:
            thread_logger.error("Error while removing resource #%s from MDS: %s", res.id, ex)
            return False
        mapping.Resource.objects.with_id(res.id).update(unset__mds=True, unset__force_cleanup=True)
        thread_logger.info("Resource #%s (updated at %s) updated in Sandbox", res.id, res.time__updated)
        if send_signal:
            now = dt.datetime.utcnow()
            common_statistics.Signaler().push(dict(
                type=cts.SignalType.RESOURCES_REMOVED_FROM_MDS,
                date=now,
                timestamp=now,
                bucket=res.mds.get("n", ctr.DEFAULT_S3_BUCKET),
                delay=(now - res.time__updated).total_seconds(),
                state=res.state,
                resource_type=res.type,
                owner=res.owner
            ))
        return True

    def __remove_from_mds(self, workers_pool, jobs, res, deadline, thread_logger):
        removed = 0
        removed_size = 0
        job_id = None
        while job_id is None:
            if self.stop_event.is_set() or (deadline is not None and dt.datetime.utcnow() >= deadline):
                break
            ready_jobs = workers_pool.ready_jobs()
            for job_id in ready_jobs:
                size = jobs.pop(job_id).size
                # noinspection PyBroadException
                try:
                    if workers_pool.result(job_id):
                        removed += 1
                        removed_size += size
                except Exception:
                    thread_logger.exception("Unexpected error while removing resource #%s", res.id)
            job_id = workers_pool.spawn(res)
            if job_id is not None:
                jobs[job_id] = res
        return job_id, removed, removed_size

    @staticmethod
    def __wait_jobs(workers_pool, jobs, thread_logger):
        removed = 0
        removed_size = 0
        deadline = time.time() + max(len(jobs), 15)
        thread_logger.info("Waiting for complete of %s job(s)", len(jobs))
        while jobs and time.time() < deadline:
            ready_jobs = workers_pool.ready_jobs()
            for job_id in ready_jobs:
                size = jobs.pop(job_id).size
                # noinspection PyBroadException
                try:
                    if workers_pool.result(job_id):
                        removed += 1
                        removed_size += size
                except Exception:
                    thread_logger.exception("Unexpected error in cleanup_non_ready_resources")
            if not ready_jobs:
                time.sleep(1)
        return removed, removed_size

    def _cleanup_excess_bucket_data(self, excess_size, tvm_ticket, deadline, bucket, thread_logger):
        if excess_size <= 0:
            return

        thread_logger.info("Need to remove resources from bucket %s with total size %s", bucket, excess_size)
        title = "{}{}remove_from_mds".format(
            setproctitle.getproctitle().rsplit(common_os.PROCESS_TITLE_DELIMITER, 1)[0],
            common_os.PROCESS_TITLE_DELIMITER,
        )
        workers_pool = common_os.WorkersPool(
            lambda r: self._remove_from_mds(r, tvm_ticket, deadline, thread_logger, send_signal=False),
            self.REMOVE_PROCESS_POOL_SIZE, self.REMOVE_THREAD_POOL_SIZE, title=title
        )
        workers_pool.start()
        total_removed = 0
        total_removed_size = 0
        try:
            jobs = {}
            # noinspection PyProtectedMember
            for res in six.moves.map(
                lambda _: ResourceInfo.from_db(*_),
                mapping.Resource.objects(
                    mds__exists=True,
                    mds__namespace=bucket,
                    state=ctr.State.READY,
                    read_preference=mapping.ReadPreference.SECONDARY,
                ).order_by("time__created").limit(self.MAX_RESOURCES_AT_ONCE).fast_scalar(*ResourceInfo._fields)
            ):
                num_sources = len([h for h in res.hosts_states if h["st"] == ctr.HostState.OK])
                if num_sources < self.MIN_RESOURCE_SOURCES:
                    thread_logger.warning(
                        "Cannot remove resource #%s from MDS due insufficient additional sources", num_sources
                    )
                    continue
                job_id, removed, removed_size = self.__remove_from_mds(workers_pool, jobs, res, deadline, thread_logger)
                total_removed += removed
                total_removed_size += removed_size
                if job_id is not None:
                    excess_size -= res.size
                if excess_size <= 0:
                    break
                if self.stop_event.is_set() or (deadline is not None and dt.datetime.utcnow() >= deadline):
                    break
            removed, removed_size = self.__wait_jobs(workers_pool, jobs, thread_logger)
            total_removed += removed
            total_removed_size += removed_size
            workers_pool.stop()
        finally:
            thread_logger.info(
                "Removed %s resources with total size %s from bucket %s",
                total_removed, common_format.size2str(total_removed_size), bucket
            )

    @set_process_title
    @patched_requests_urllib
    def cleanup_excess_default_bucket_data(self, state):
        logger_name = self.cleanup_excess_default_bucket_data.__name__
        thread_logger = common_log.MessageAdapter(
            logger.getChild(logger_name), fmt="[{}] %(message)s".format(logger_name)
        )
        tvm_service = self.sandbox_config.common.mds.skyboned.tvm_service
        tvm_ticket = common_tvm.TVM.get_service_ticket([tvm_service])[tvm_service]
        deadline = dt.datetime.utcnow() + dt.timedelta(seconds=state.interval)
        excess_size = self._calculate_excess_bucket_data_size(
            ctr.DEFAULT_S3_BUCKET, self.EXCESS_DEFAULT_BUCKET_SIZE_THRESHOLD, thread_logger
        )
        self._cleanup_excess_bucket_data(excess_size, tvm_ticket, deadline, ctr.DEFAULT_S3_BUCKET, thread_logger)
        return None, [], state

    @set_process_title
    @patched_requests_urllib
    def cleanup_non_ready_resources(self, state):
        logger_name = self.cleanup_non_ready_resources.__name__
        thread_logger = common_log.MessageAdapter(
            logger.getChild(logger_name), fmt="[{}] %(message)s".format(logger_name)
        )

        tvm_service = self.sandbox_config.common.mds.skyboned.tvm_service
        tvm_ticket = common_tvm.TVM.get_service_ticket([tvm_service])[tvm_service]
        deadline = dt.datetime.utcnow() + dt.timedelta(seconds=state.interval)

        total_removed = 0
        total_removed_size = 0
        thread_logger.info("Removing outdated not ready resources from MDS")

        # noinspection PyProtectedMember
        force_cleanup_query = mapping.Resource.objects(
            mds__exists=True,
            state__in=[ctr.State.BROKEN, ctr.State.DELETED],
            force_cleanup=True,
            read_preference=mapping.ReadPreference.SECONDARY,
        ).order_by("time__updated").limit(self.MAX_RESOURCES_AT_ONCE).fast_scalar(*ResourceInfo._fields)

        # noinspection PyProtectedMember
        normal_cleanup_query = mapping.Resource.objects(
            mds__exists=True,
            state__in=[ctr.State.NOT_READY, ctr.State.BROKEN, ctr.State.DELETED],
            force_cleanup__ne=True,  # could be None
            time__updated__lte=dt.datetime.utcnow() - dt.timedelta(days=self.BROKEN_RESOURCES_TTL),
            read_preference=mapping.ReadPreference.SECONDARY,
        ).order_by("time__updated").limit(self.MAX_RESOURCES_AT_ONCE).fast_scalar(*ResourceInfo._fields)

        title = "{}{}remove_from_mds".format(
            setproctitle.getproctitle().rsplit(common_os.PROCESS_TITLE_DELIMITER, 1)[0],
            common_os.PROCESS_TITLE_DELIMITER,
        )
        workers_pool = common_os.WorkersPool(
            lambda r: self._remove_from_mds(r, tvm_ticket, deadline, thread_logger),
            self.REMOVE_PROCESS_POOL_SIZE, self.REMOVE_THREAD_POOL_SIZE, title=title
        )
        thread_logger.info("Starting workers")
        workers_pool.start()
        try:
            jobs = {}
            for res in it.islice(
                it.chain(force_cleanup_query, normal_cleanup_query),
                self.MAX_RESOURCES_AT_ONCE
            ):
                res = ResourceInfo.from_db(*res)
                job_id, removed, removed_size = self.__remove_from_mds(workers_pool, jobs, res, deadline, thread_logger)
                total_removed += removed
                total_removed_size += removed_size
                if self.stop_event.is_set() or (deadline is not None and dt.datetime.utcnow() >= deadline):
                    break
            removed, removed_size = self.__wait_jobs(workers_pool, jobs, thread_logger)
            total_removed += removed
            total_removed_size += removed_size
            thread_logger.info("Stopping workers")
            workers_pool.stop()
        finally:
            thread_logger.info(
                "Removed %s not ready resources with total size %s",
                total_removed, common_format.size2str(total_removed_size)
            )
        return None, [], state

    @staticmethod
    def _notify_about_quota(abc_service, group_names, bucket, bucket_stats, warning, thread_logger):
        used_space = bucket_stats["used_space"]
        max_size = bucket_stats["max_size"]
        max_size_str = common_format.size2str(max_size)
        used_space_str = common_format.size2str(used_space)
        free_space_str = common_format.size2str(max_size - used_space)
        color = "darkorange" if warning else "red"
        subject = " ".join((
            "Space in S3-MDS for service {} is",
            ("near to exceed" if warning else "exceeded"),
            "(used {} from {}, {} free)"
        )).format(abc_service, used_space_str, max_size_str, free_space_str)
        group_urls = [
            """<a href="https://{host}/admin/groups/{group}">{group}</a>""".format(
                group=group_name, host=common_config.Registry().server.web.address.host
            )
            for group_name in group_names
        ]
        body = textwrap.dedent("""
        <b>ABC service:</b> {service}<br>
        <b>Sandbox groups:</b> {group_urls}<br>
        <b>S3 bucket:</b> {bucket}<br>
        <b>Bucket max size:</b> {size}<br>
        <b>Current used space in bucket:</b> {used}<br>
        <font color="{color}"><b>Free space in bucket:</b> {free}</font><br>
        More info <a href="https://docs.yandex-team.ru/sandbox/resources#default-notifications">here</a>.<br>
        """).format(
            group_urls=", ".join(group_urls),
            service=abc_service,
            bucket=bucket,
            size=max_size_str,
            used=used_space_str,
            free=free_space_str,
            color=color
        )
        send_to = common_abc.bucket_notification_recipient(abc_service)
        if not send_to:
            thread_logger.warning(
                "No resource/quota managers or responsible in ABC service %s (groups: %s)",
                abc_service, ", ".join(group_names)
            )
            return
        thread_logger.info("Sending notification to %s with subject: %s", send_to, subject)
        mapping.Notification(
            send_to=list(send_to),
            subject=subject,
            body=body,
            transport=ctn.Transport.EMAIL,
            type=ctn.Type.HTML,
            charset=ctn.Charset.UTF
        ).save(force_insert=True)

    @set_process_title
    def check_quotas(self, state):
        logger_name = self.check_quotas.__name__
        thread_logger = common_log.MessageAdapter(
            logger.getChild(logger_name), fmt="[{}] %(message)s".format(logger_name)
        )
        last_check_time = state.persistent_state.get("last_quotas_check")
        last_check_time = last_check_time and common_format.str2dt(last_check_time)
        now = dt.datetime.utcnow()
        if last_check_time and last_check_time + dt.timedelta(seconds=self.CHECK_USER_BUCKET_QUOTA_INTERVAL) > now:
            return None, [], state
        thread_logger.info("Checking quotas in user buckets")
        total_exceeded = 0
        total_warnings = 0
        groups_in_abc_service = collections.defaultdict(set)
        try:
            for group_name, abc_service in mapping.Group.objects(
                abc__exists=True,
                read_preference=mapping.ReadPreference.SECONDARY,
            ).fast_scalar("name", "abc"):
                groups_in_abc_service[abc_service].add(group_name)

            buckets_ignore_notifications = dict(
                mapping.Bucket.objects(read_preference=mapping.ReadPreference.SECONDARY).fast_scalar(
                    "name", "ignore_bucket_exhaust"
                )
            )

            for abc_service, group_names in groups_in_abc_service.items():
                abc_service_id = common_abc.abc_service_id(abc_service)
                bucket, bucket_stats = common_mds.S3.check_bucket(abc_service_id, logger=thread_logger)
                if not bucket:
                    continue
                used_space = bucket_stats["used_space"]
                max_size = bucket_stats["max_size"]
                if not max_size or float(used_space) / max_size < self.USER_BUCKET_QUOTA_WARNING_THRESHOLD:
                    continue
                warning = max_size > used_space
                if warning:
                    total_warnings += 1
                else:
                    total_exceeded += 1
                if buckets_ignore_notifications.get(bucket, None):
                    continue
                self._notify_about_quota(abc_service, sorted(group_names), bucket, bucket_stats, warning, thread_logger)
            state.persistent_state["last_quotas_check"] = common_format.dt2str(dt.datetime.utcnow())
            return None, [], state
        finally:
            thread_logger.info(
                "Sent %s messages of exceeded quota and %s warning about low space left in bucket",
                total_exceeded, total_warnings
            )

    @classmethod
    def _all_buckets(cls):
        s3 = common_mds.S3().s3_client()
        cont_token = ""
        while cont_token is not None:
            result = s3.list_buckets(ContinuationToken=cont_token)
            cont_token = result.get("NextContinuationToken")
            for item in result.get("Buckets", ()):
                if item["Name"] not in cls.BUCKETS_WITHOUT_GC:
                    yield item["Name"]
        yield ctr.WAREHOUSE_BUCKET

    def _cleanup_bucket_garbage(self, state, deadline, parent_logger_name):
        # we want Signaler to reinitialize because we're currently inside the fork
        # note that reset() also works with handlers which is class attribute, so just 'del' isn't enough
        common_statistics.Signaler().reset()
        del common_statistics.Signaler.instance

        mapping.ensure_connection(
            uri=self.sandbox_config.server.mongodb.connection_url,
            max_pool_size=100,
        )

        bucket = state.bucket
        thread_logger = common_log.MessageAdapter(
            logger.getChild("{}_{}".format(parent_logger_name, bucket)),
            fmt="[{}: {}] %(message)s".format(parent_logger_name, bucket)
        )
        thread_logger.info("Started")
        if common_os.Subprocess.current is not None:
            common_os.Subprocess.current.stop_watchdog()
        s3 = common_mds.S3().s3_client()
        garbage_size = 0
        garbage_count = 0
        try:
            cont_token = ""
            while cont_token is not None:
                result = s3.list_objects_v2(Bucket=bucket, StartAfter=state.current_key, ContinuationToken=cont_token)
                cont_token = result.get("NextContinuationToken")
                resources = collections.defaultdict(list)
                garbage_keys = set()
                max_modified_time = dt.datetime.utcnow() - dt.timedelta(hours=self.GC_DELAY)
                for item in result.get("Contents", ()):
                    key = item["Key"]
                    size = item["Size"]
                    if key <= state.current_key:
                        logger.warning(
                            "Current key %s is not greater then previous %s, process interrupted",
                            key, state.current_key
                        )
                        return state.dump()
                    state.checked_count += 1
                    state.checked_size += size
                    state.detect_meta_info(key, size)
                    last_modified = item["LastModified"]
                    if last_modified.replace(tzinfo=None) > max_modified_time:
                        continue
                    rid_str, file_uid, filename = (key.split("/") + ["", ""])[:3]
                    if not rid_str.isdigit():
                        garbage_keys.add(key)
                        garbage_size += size
                        garbage_count += 1
                        continue
                    rid = int(rid_str)

                    while rid != state.rid:
                        if state.rid is not None and not state.skip and state.meta_key is not None and state.keys:
                            # noinspection PyBroadException
                            try:
                                meta_str = "".join(s3.get_object(Bucket=bucket, Key=state.meta_key)["Body"])
                                meta = json.loads(meta_str)
                            except Exception:
                                logger.exception("Error while loading meta from key %s", state.meta_key)
                                state.skip_resource()
                                break
                            state.meta_size += len(meta_str)
                            meta_keys = {item["key"] for item in meta if item["type"] == "file"}
                            extra_keys = state.keys - meta_keys
                            if extra_keys:
                                garbage_size += sum(state.sizes[k] for k in extra_keys)
                                garbage_count += len(extra_keys)
                                garbage_keys.update(extra_keys)
                        state.next_resource(rid)
                        if self.stop_event.is_set() or (deadline is not None and dt.datetime.utcnow() >= deadline):
                            return state.dump()
                        break

                    if file_uid:
                        if filename:  # multi file resource
                            state.keys.add(key)
                            state.sizes[key] = size
                        elif state.meta_key is None:  # meta for multi file resource
                            state.meta_key = key
                    else:
                        state.skip_resource()  # skip indexed tarball

                    resources[rid].append((key, size))
                    state.current_key = key

                query = dict(id__in=resources.keys())
                if bucket == ctr.WAREHOUSE_BUCKET:
                    query.update(mds__backup_namespace=bucket)
                else:
                    query.update(mds__namespace=bucket)
                existing_rids = set(
                    mapping.Resource.objects(**query).fast_scalar("id")
                )
                for rid, keys in resources.items():
                    if rid in existing_rids:
                        continue
                    for key, size in keys:
                        garbage_keys.add(key)
                        garbage_size += size
                        garbage_count += 1
                for chunk in common_it.chunker(list(garbage_keys), 1000):
                    # noinspection PyUnresolvedReferences
                    try:
                        resp = s3.delete_objects(
                            Bucket=bucket, Delete=dict(Objects=[dict(Key=key) for key in chunk])
                        )
                    except botocore.exceptions.ClientError as ex:
                        thread_logger.error("Error while deleting garbage: %s", ex)
                    else:
                        errors = resp.get("Errors")
                        if errors:
                            thread_logger.error("Error while deleting garbage: %s", errors)
                update_progress(state.progress)

            bucket_stats = common_mds.S3.bucket_stats(bucket)
            self._report_signal(state, bucket_stats)
            state.clean = True
        finally:
            state.removed_count += garbage_count
            state.removed_size += garbage_size
            thread_logger.info(state.format_stat())

        return state.dump()

    @staticmethod
    def _report_signal(state, bucket_stats):
        now = dt.datetime.utcnow()
        common_statistics.Signaler().push(dict(
            type=cts.SignalType.MDS_BUCKET_INFO,
            date=now,
            timestamp=now,
            bucket=state.bucket,
            meta_info_size=state.meta_size,
            index_size=state.index_size,
            objects_parts_size=bucket_stats["objects_parts_size"],
            deleted_objects_size=bucket_stats["deleted_objects_size"]
        ))
        common_statistics.Signaler().wait()

    def _start_gc_worker(self, workers_pool, gc_state, deadline, logger_name):
        title = "{}{}{}".format(
            setproctitle.getproctitle().rsplit(common_os.PROCESS_TITLE_DELIMITER, 1)[0],
            common_os.PROCESS_TITLE_DELIMITER,
            gc_state.bucket
        )
        mapping.disconnect()  # the connection is guaranteed to be restored before workers' execution
        job_id = workers_pool.spawn(
            self._cleanup_bucket_garbage, (gc_state, deadline, logger_name), {},
            title=title, watchdog=3,
        )
        mapping.ensure_connection(
            uri=self.sandbox_config.server.mongodb.connection_url,
            max_pool_size=100,
        )
        return job_id

    def _ensure_gc_workers(self, workers_pool, gc_states, buckets, deadline, thread_logger):
        logger_name = thread_logger.logger.name.split(".")[-1]
        jobs = {}
        last_bucket = ""
        buckets_being_processed = set()
        for gc_state in list(gc_states):
            last_bucket = max(last_bucket, gc_state.bucket)
            if gc_state.clean:
                gc_states.remove(gc_state)
            else:
                buckets_being_processed.add(gc_state.bucket)
        free_slots = self.GC_WORKERS_POOL_SIZE - len(gc_states)
        if free_slots > 0:
            next_bucket_index = bisect.bisect_right(buckets, last_bucket)
            if next_bucket_index == len(buckets):
                next_bucket_index = 0
            for bucket_index in range(next_bucket_index, min(next_bucket_index + free_slots, len(buckets))):
                bucket = buckets[bucket_index]
                if bucket in buckets_being_processed:
                    continue
                bucket_stats = common_mds.S3.bucket_stats(bucket, logger=thread_logger)
                if bucket_stats is None:
                    thread_logger.warning("No stats for bucket %s, skipping", bucket)
                    continue
                total_keys = sum(bucket_stats.get(k, 0) for k in ("simple_objects_count", "multipart_objects_count"))
                gc_state = GCState(bucket, total_keys=total_keys)
                gc_states.append(gc_state)
                job_id = self._start_gc_worker(workers_pool, gc_state, deadline, logger_name)
                jobs[job_id] = bucket
        return jobs

    @set_process_title
    def cleanup_garbage(self, state):
        buckets = sorted(self._all_buckets())
        if not buckets:
            return None, [], state
        logger_name = self.cleanup_garbage.__name__
        thread_logger = common_log.MessageAdapter(
            logger.getChild(logger_name), fmt="[{}] %(message)s".format(logger_name)
        )
        gc_states = [GCState.load(data) for data in state.persistent_state.get("gc_states", [])]
        workers_pool = common_os.SubprocessPool(self.GC_WORKERS_POOL_SIZE, logger=thread_logger)
        deadline = dt.datetime.utcnow() + dt.timedelta(seconds=state.interval)
        job_to_bucket = {}
        for gc_state in gc_states:
            if not gc_state.clean:
                job_id = self._start_gc_worker(workers_pool, gc_state, deadline, logger_name)
                job_to_bucket[job_id] = gc_state.bucket
        running = True
        while running:
            job_to_bucket.update(self._ensure_gc_workers(workers_pool, gc_states, buckets, deadline, thread_logger))
            ready_jobs = {}
            while workers_pool.ready_jobs() or workers_pool.pending_jobs():
                ready_jobs = {job_id: workers_pool.raw_result(job_id) for job_id in workers_pool.ready_jobs()}
                if ready_jobs:
                    break
                time.sleep(1)
            if self.stop_event.is_set() or dt.datetime.utcnow() >= deadline:
                while workers_pool.pending_jobs():
                    time.sleep(1)
                running = False
            results = {}
            for job_id, result in ready_jobs.items():
                bucket = job_to_bucket.pop(job_id, None)
                results[bucket] = result
            for i, gc_state in enumerate(gc_states):
                result = results.get(gc_state.bucket)
                if result is not None:
                    ready_gc_state, exception = result
                    if exception is None:
                        gc_states[i] = GCState.load(ready_gc_state)
                    else:
                        # restart failed worker
                        thread_logger.warning("Restarting worker for bucket %s due to: %s", gc_state.bucket, exception)
                        job_id = self._start_gc_worker(workers_pool, gc_state, deadline, logger_name)
                        job_to_bucket[job_id] = gc_state.bucket
            state.persistent_state["gc_states"] = [s.dump() for s in gc_states]
        return None, [], state

    @classmethod
    def manual_cleanup(cls, bucket, interval=1):
        """
        :param bucket: S3 bucket name
        :param interval: progress interval in minutes
        """
        mapping.ensure_connection()
        logging.basicConfig(level=logging.INFO)
        bucket_stats = common_mds.S3.bucket_stats(bucket)
        total_keys = sum(bucket_stats.get(k, 0) for k in ("simple_objects_count", "multipart_objects_count"))
        gc_state = GCState(bucket, total_keys=total_keys)
        cleaner = cls()
        while not gc_state.clean:
            deadline = dt.datetime.utcnow() + dt.timedelta(minutes=interval)
            cleaner._cleanup_bucket_garbage(gc_state, deadline, cls.manual_cleanup.__name__)
