import os
import sys
import json
import stat
import time
import errno
import random
import psutil
import shutil
import logging
import smtplib
import textwrap
import datetime as dt
import itertools as it
import functools as ft
import contextlib
import collections
import distutils.util

import gevent
import gevent.lock
import gevent.queue
import gevent.subprocess as sp

sys.modules["gevent.coros"] = gevent.lock  # TODO: crutch for copier.rpc.client. Remove after SKYDEV-1223, SKYDEV-1237
setattr(gevent, "coros", gevent.lock)

import py
import requests
import aniso8601

import api.copier
import api.procman

from kernel.util import console

from sandbox.common import os as common_os
from sandbox.common import fs as common_fs
from sandbox.common import abc as common_abc
from sandbox.common import log as common_log
from sandbox.common import mds as common_mds
from sandbox.common import rest as common_rest
from sandbox.common import share as common_share
from sandbox.common import config as common_config
from sandbox.common import format as common_format
from sandbox.common import system as common_system
from sandbox.common import context as common_context
from sandbox.common import patterns as common_patterns
from sandbox.common import itertools as common_itertools
from sandbox.common import statistics as common_statistics

import sandbox.common.types.misc as ctm
import sandbox.common.types.task as ctt
import sandbox.common.types.client as ctc
import sandbox.common.types.resource as ctr
import sandbox.common.types.statistics as cts

import sandbox.web.api.v1 as api_v1

import sandbox.agentr.db as adb
import sandbox.agentr.types as atypes
import sandbox.agentr.utils as autils
import sandbox.agentr.errors as aerrors


class Buckets(object):
    class Bucket(common_patterns.Abstract):
        __slots__ = ("path", "no", "total", "free", "reserve", "banned", "locks", "semaphore", "_db")
        __defs__ = (None, None, 0, 0, 0, False, {}, None, None)

        # https://btrfs.wiki.kernel.org/index.php/FAQ#if_your_device_is_large_.28.3E16GiB.29
        BTRFS_RESERVE = 0.002  # Reserve 0,2% on each bucket. TODO: Should be replaced with package 'btrfs' usage.
        BTRFS_RESERVE_MIN = 300 << 30  # Reserve at least 300Gb on each bucket

        def __init__(self, *args, **kwargs):
            hard_limit = kwargs.pop("hard_limit", None)
            super(Buckets.Bucket, self).__init__(*args, **kwargs)
            if hard_limit:
                self.semaphore = gevent.lock.Semaphore(hard_limit)

        @property
        def locked(self):
            return sum(self.locks.itervalues())

        @property
        def available(self):
            return self.free - self.locked

        def __str__(self):
            return "{}: {} {} of {}".format(
                self.path,
                common_format.size2str(self.free - self.locked),
                (
                    "({} locked for {!r})".format(common_format.size2str(self.locked), sorted(self.locks.keys()))
                    if not self.banned else
                    "BANNED"
                ),
                common_format.size2str(self.total),
            )

        def resources(self):
            return self._db.query('SELECT "id", "size" FROM "resource" WHERE "bucket" = ?', (self.no,))

        def erase(self):
            """ DANGEROUS!!! Drops local cache database records about this bucket!!! """
            return self._db.query('DELETE FROM "resource" WHERE bucket = ?', (self.no,))

        def update(self):
            """ Update bucket's free and total sizes from the system report. """
            if self.locks:
                return self
            self.total, self.free = common_system.get_disk_space(self.path)
            if self.no is not None:  # Suggest all the buckets are running on BTRFS
                min_reserve = (
                    0
                    if common_config.Registry().common.installation == ctm.Installation.TEST else
                    self.BTRFS_RESERVE_MIN
                )
                self.reserve = max(int(self.total * self.BTRFS_RESERVE), min_reserve)
                self.total -= self.reserve
                self.free -= self.reserve
            if self.banned:
                self.free = 0
            return self

        def ban(self):
            self.free = 0
            self.banned = True

        def unban(self):
            self.banned = False
            self.update()

        def lock(self, resource):
            self.semaphore.acquire()
            self.locks[resource["id"]] = resource["size"]

        def unlock(self, resource):
            try:
                unlocked = self.locks.pop(resource["id"])
            except KeyError:
                return
            self.semaphore.release()
            self.update()
            if self.locks:
                self.free -= unlocked
            return self

    PROC_TAG = "bucket_balance"
    TS = collections.namedtuple("TS", ("updated", "checked", "meta"))
    RES = collections.namedtuple("RES", ("resource_id", "bucket_id"))

    def __init__(
        self, logger, db, buckets, banned, datadir, max_locks, hard_limit,
        cache_exp_created_hours, cache_exp_fetched_hours
    ):
        self._logger = logger
        self._db = db
        self._max_locks = max_locks
        self._hard_limit = hard_limit
        self._cache_exp_created_hours = cache_exp_created_hours
        self._cache_exp_fetched_hours = cache_exp_fetched_hours
        self._buckets = [
            self.Bucket(_, i, banned=i in banned, hard_limit=hard_limit, _db=db).update()
            for i, _ in enumerate(buckets)
        ]
        self.storage = self.Bucket(datadir, hard_limit=hard_limit)
        self._balancers = {}

    def __getitem__(self, item):
        return self.storage if item is None else self._buckets[item]

    def __len__(self):
        return len(self._buckets)

    def __iter__(self):
        return it.imap(self.RES._make, self._db.iquery(
            """
            SELECT "id", "bucket" FROM "resource" WHERE "bucket" IS NOT NULL
            """,
            log=adb.Log.STATEMENT
        ))

    def info(self, logger=None):
        t = self.total
        logger = logger or self._logger
        msg = "".join("\t#{}: {}\n".format(_.no, _) for _ in self._buckets)
        msg += "\t" + str(t)
        logger.info("Current buckets state:\n" + msg)
        return t

    def cache_info(self, logger=None):
        logger = logger or self._logger
        data = self._db.query('SELECT COUNT("id"), SUM("size") FROM "resource"', one=True)
        logger.info(
            "Local cache has information about %d resources totally for %s",
            data[0], common_format.size2str(data[1] or 0)
        )

    @property
    def total(self):
        ret = self.Bucket("Total") if self._buckets else self.storage.update()
        for _ in self._buckets:
            _.update()
            ret.total += _.total
            ret.free += max(_.free, 0)
            ret.locks.update(_.locks)
            ret.reserve += _.reserve
        return ret

    def check(self, rid):
        data = self._db.query(
            """SELECT "bucket", "updated", "checked", "meta" FROM "resource" WHERE "id" = ?""",
            (rid,), one=True
        )
        if not data:
            return None, None
        return (self.storage if data[0] is None else self._buckets[data[0]]), self.TS(
            dt.datetime.strptime(data[1], atypes.DT_FMT),
            dt.datetime.strptime(data[2], atypes.DT_FMT),
            json.loads(data[3])
        )

    def touch(self, resource):
        resource.setdefault("time", {})["accessed"] = common_format.utcdt2iso()
        self._db.query(
            """UPDATE "resource" SET "meta" = ? WHERE "id" = ?""",
            (json.dumps(resource), resource["id"]),
            log=adb.Log.STATEMENT
        )

    def actualize(self, resource, overwrite_meta=True, db=None, touch=True):
        """
        Updates resource meta with server data.

        :param resource: resource to work on
        :param overwrite_meta: whether to overwrite meta or read DB + update from parameter
        :param db: allows to override DB object to use
        :param touch: whether to "touch" record and prolong its expiration date
        """
        if db is None:
            db = self._db
        self._logger.debug("Actualizing resource #%r database record.", resource["id"])
        if not overwrite_meta:
            # read meta from the DB and update it from the passed parameter
            meta = db.query_one_col("""SELECT "meta" FROM "resource" WHERE "id" = ?""", (resource["id"],))
            if meta is None:
                return
            meta = json.loads(meta)
            if meta is None:
                return
            meta.update(resource)
            resource = meta
        exp = resource["time"]["expires"]
        exp = exp and aniso8601.parse_datetime(exp).strftime(atypes.DT_FMT)
        cache_exp = (dt.datetime.utcnow() + dt.timedelta(hours=self._cache_exp_fetched_hours)).strftime(atypes.DT_FMT)

        # also updates (e.g. "status", "expires"): in fact these are duplicates of server meta to provide search
        query_template = """
        UPDATE "resource"
        SET {} "updated" = ?, "expires" = ?, "status" = ?, "meta" = ?
        WHERE "id" = ?
        """.format('"cache_expires" = ?,' if touch else "")

        params = (
            dt.datetime.utcnow().strftime(atypes.DT_FMT),
            exp,
            resource["state"],
            json.dumps(resource),
            resource["id"]
        )
        if touch:
            params = (cache_exp,) + params

        db.query(query_template, params, log=adb.Log.STATEMENT)

    def record(self, resource, bucket, created_locally):
        if not bucket and self._buckets:
            raise aerrors.ARException(
                "Unable to save a record about resource #{} without a bucket.".format(resource["id"])
            )
        now = dt.datetime.utcnow().strftime(atypes.DT_FMT)
        resource = resource.copy()
        exp = resource["time"]["expires"]
        exp = exp and aniso8601.parse_datetime(exp).strftime(atypes.DT_FMT)
        cache_exp_hours = self._cache_exp_created_hours if created_locally else self._cache_exp_fetched_hours
        cache_exp_str = (dt.datetime.utcnow() + dt.timedelta(hours=cache_exp_hours)).strftime(atypes.DT_FMT)
        self._logger.debug("Adding resource #%r database record.", resource["id"])
        map(lambda _: resource.pop(_, None), ("http", "rsync", "sources"))  # Drop unnecessary data
        self._db.query(
            """
            REPLACE INTO "resource"
              (
                "id", "type", "size", "status",
                "fetched", "updated", "checked", "expires", "cache_expires",
                "skynet_id", "bucket", "meta"
              )
            VALUES
              (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
            """,
            (
                resource["id"], resource["type"], resource["size"], resource["state"],
                now, now, now, exp, cache_exp_str,
                resource["skynet_id"], bucket and bucket.no, json.dumps(resource)
            ),
            log=adb.Log.STATEMENT
        )

    def unlock(self, resource, bucket, fetched=True):
        created_locally = fetched is None
        bucket = (self.storage if (bucket and bucket.no) is None else self._buckets[bucket.no]).unlock(resource)
        fetched_str = "created" if created_locally else ("fetched" if fetched else "not fetched")
        self._logger.debug(
            "Unlocked bucket #%r from %s resource #%r at %s",
            bucket.no, fetched_str, resource["id"], bucket
        )
        return self.record(resource, bucket, created_locally) if fetched or created_locally else None

    def __best(self, resource):
        def __next(buckets):
            buckets = sorted(buckets, key=lambda _: (len(_.locks), _.available))
            return buckets[0] if buckets else None

        res = self._db.iquery(
            """
            SELECT
              "bucket", COUNT("id") AS "amount"
            FROM "resource"
            WHERE "type" = ?
            GROUP BY "bucket"
            ORDER BY "amount" DESC
            """,
            (resource["type"],)
        )
        size = resource["size"]
        a2b = {0: [_ for _ in self._buckets if _.available > size]}
        for no, amt in res:
            b = self._buckets[no]
            if b.available > size:
                a2b.setdefault(amt, []).append(b)
        for amt in sorted(a2b, reverse=True):
            bucket = __next(a2b[amt])
            if bucket and len(bucket.locks) < self._max_locks:
                return bucket
        if not a2b[0]:
            raise aerrors.NoSpaceLeft("There's no bucket with at least {} space available.".format(
                common_format.size2str(resource["size"])
            ))
        return __next(a2b[0])

    def lock(self, resource):
        bucket = (self.__best(resource) if self._buckets else self.storage).update()
        size = max(resource["size"], 1024)
        if bucket == self.storage and bucket.available < size:
            raise aerrors.NoSpaceLeft("There's no space available at {}".format(bucket))
        self._logger.debug("Acquiring bucket's #%r semaphore for resource #%r", bucket.no, resource["id"])
        bucket.lock(resource)
        self._logger.info(
            "Locked bucket #%r for resource #%r at %s. Semaphore left: %s",
            bucket.no, resource["id"], bucket, bucket.semaphore.counter
        )
        return bucket

    def reset(self):
        self._logger.info("Resetting buckets set.")
        for b in self._buckets or [self.storage]:
            if b.locks:
                self._logger.warning("Detected stale lock on bucket %s", b)
            if b.semaphore.counter != self._hard_limit:
                self._logger.warning("Detected stale semaphore lock on bucket %s", b)
                b.semaphore = gevent.lock.Semaphore(self._hard_limit)
            b.locks = {}

    def local_resources(self, ids=None):
        data = (
            self._db.query('SELECT "bucket", "id", "meta" FROM "resource"')
            if ids is None else
            self._db.query('SELECT "bucket", "id", "meta" FROM "resource" WHERE "id" IN (??)', (list(ids),))
        )
        return [atypes.LocalResource(bucket, rid, json.loads(meta)) for (bucket, rid, meta) in data]

    def __recursor(self, base, no, level, rec=0):
        for node in sorted(base.listdir()):
            gevent.sleep()
            try:
                did = int(node.basename)
            except (TypeError, ValueError):
                continue
            if level - rec:
                if not rec:
                    self._logger.debug("Scanning directory '%d' of bucket #%r", did, no)
                for _ in self.__recursor(node, no, level, rec + 1):
                    yield _
            else:
                yield did, no

    def _scan_bucket(self, bucket):
        self._logger.info("Start scanning bucket #%r at %r", bucket.no, bucket.path)
        ret = dict(self.__recursor(py.path.local(bucket.path), bucket.no, 2))
        self._logger.info("Finished scanning bucket #%r at %r", bucket.no, bucket.path)
        return ret

    def actual_resources(self):
        ret = dict()
        threads = [gevent.Greenlet(self._scan_bucket, _) for _ in self._buckets or [self.storage]]
        map(gevent.Greenlet.start, threads)
        map(ret.update, map(gevent.Greenlet.get, threads))
        return ret

    def drop_records(self, ids):
        """ DANGEROUS!!! Drops local cache database records about given resources. """
        return self._db.query('DELETE FROM "resource" WHERE id IN (??)', (list(ids),))


class Fetcher(object):
    # Name of the task's log directory agent's log mirror.
    LOG_NAME = "agentr.log"
    # The exit code should the worker should return in case `skybone-ctl` tool cannot relocate resource's metadata
    WORKER_SPECIAL_EXIT_CODE = 42
    # Check resource existence every 24 hours
    RECHECK_DATA = 24
    # Update resource metadata each hour
    UPDATE_META = 1
    # Amount of resource records to be dropped from skybone in one request or
    SKY_METADATA_DROP_CHUNK = 150
    # Amount of resource symlinks to de dropped from disk in one chunk on mass resource deletion
    SYMLINKS_DROP_CHUNK = 500
    # Timeout to fetch files list in case of fetched resource's skynet ID differs from the database record.
    SKY_FILES_TIMEOUT = 30
    # Send notifications from
    MAIL_FROM = "sandbox-noreply@yandex-team.ru"
    # Send notifications to
    MAIL_TO = "sandbox-warnings@yandex-team.ru"
    # Minimal resource size for download via skynet priority
    SKYNET_MIN_RESOURCE_SIZE = 50 << 20

    # files allocated for disk usage logs
    DISK_USAGE_FILES = {
        ctm.DiskUsageType.FINAL: "disk_usage.yaml",
        ctm.DiskUsageType.PEAK: "peak_disk_usage.yaml"
    }
    # size of a disk usage file, in bytes
    DISK_USAGE_FILE_SIZE = 5 * 1024 ** 2
    # template for arcadia hg cache .hg/hgrc file
    HGRC_TEMPLATE = textwrap.dedent("""
        [paths]
        default = {arcadia_url}

        [ui]
        ssh = ssh -C -i "{arcadia_key}"
    """)

    # noinspection PyPep8Naming
    class __metaclass__(type):
        _fs_lock = gevent.lock.RLock()  # Global lock for any FS/user change operation
        _tasks_locks = collections.defaultdict(gevent.lock.RLock)  # Lock for FS operations with tasks directory
        _tasks_locks_counter = collections.Counter()  # Length of queue for every task lock

        @property
        def fs_lock(cls):
            return cls._fs_lock

        def _tasks_dir_lock(cls, task_id):
            cls._tasks_locks_counter[task_id] += 1
            lock = cls._tasks_locks[task_id]
            lock.acquire()

        def _tasks_dir_unlock(cls, task_id):
            if task_id not in cls._tasks_locks:
                return
            lock = cls._tasks_locks[task_id]
            cls._tasks_locks_counter[task_id] -= 1
            if cls._tasks_locks_counter[task_id] == 0:
                cls._tasks_locks.pop(task_id)
                cls._tasks_locks_counter.pop(task_id)
            lock.release()

    class UserPrivileges(common_os.User.Privileges):
        def __enter__(self):
            Fetcher.fs_lock.acquire()
            super(Fetcher.UserPrivileges, self).__enter__()

        def __exit__(self, exc_type, exc_val, exc_tb):
            Fetcher.fs_lock.release()
            super(Fetcher.UserPrivileges, self).__exit__(exc_type, exc_val, exc_tb)

    def __init__(self, tee, logger, task_id, rest, config, users, buckets, fastbone=True):
        self.tee = tee
        self.logger = logger
        self.task_id = task_id
        self.rest = rest
        self.config = config
        self.users = users
        self.buckets = buckets
        self.new_layout = ctc.Tag.NEW_LAYOUT in self.config.client.tags
        self.sky_kws = {}
        if not fastbone:
            self.sky_kws["network"] = api.copier.Network.Backbone

    @property
    def data_owner(self):
        """ User name (login) which should own resources' data files """
        return self.users.service.login if self.new_layout else self.users.unprivileged.login

    @common_patterns.singleton_classproperty
    def worker(self):
        return py.path.local(__file__).join("..", "bin", "worker.py").strpath

    @property
    def _copier_api(self):
        # TODO: Waiting for `skynet.api`'s "use_gevent" flag.
        c = api.copier.Copier()
        try:
            import ya.skynet.services.copier.rpc.client as rc
            import ya.skynet.services.copier.client.transports.rbtorrent as rb
        except ImportError:
            import ya.skynet.services.skybone.rpc.client as rc
            import ya.skynet.services.skybone.client.transports.rbtorrent as rb
        rb.RPCClient = rc.RPCClient = rc.RPCClientGevent
        return c

    @classmethod
    def __worker_call(cls, arguments, logger, check=False, wrc=None, warn_on_fail=False):
        logger.debug("Starting worker with arguments %r", arguments)
        env, args = None, [sys.executable, cls.worker]

        if common_system.inside_the_binary():
            env = os.environ.copy()
            env.update({
                "Y_PYTHON_ENTRY_POINT": "sandbox.agentr.bin.worker"
            })
            args = sys.argv

        p = sp.Popen(
            args + arguments,
            preexec_fn=common_os.User.Privileges().__enter__,
            close_fds=True, stdout=sp.PIPE, stderr=sp.PIPE, env=env
        )
        return cls.__check_worker_status(p, logger, wrc, warn_on_fail) if check else p

    @staticmethod
    def taskdirs(basedir, task_id, instance):
        # type: (str, int, int) -> Tuple[py.path.local, py.path.local]

        if task_id is None or task_id <= 0:
            taskdir = py.path.local(basedir)
            return taskdir, taskdir
        taskdir = py.path.local(basedir).join(*ctt.relpath(task_id))
        return taskdir, taskdir.join("log" + str(instance))

    @classmethod
    def ensure_logdir(cls, logger, unprivileged_user, basedir, task_id, instance):
        taskdir, logdir = cls.taskdirs(basedir, task_id, instance)
        p = cls.__worker_call(
            ["ensure", unprivileged_user.login, taskdir.strpath, logdir.basename, cls.LOG_NAME],
            logger=logger
        )
        logger.info(
            "Pre-creating task's #%s log dir %r at %r. Waiting for worker #%d",
            task_id, logdir, taskdir.strpath, p.pid
        )
        cls.__check_worker_status(p, logger)
        return taskdir, logdir

    @classmethod
    def allocate_disk_usage(cls, logger, logdir, unprivileged_user):
        workers = []
        for filename in cls.DISK_USAGE_FILES.values():
            disk_usage_path = logdir.join(filename)
            workers.append(cls.__worker_call(
                ["allocate", unprivileged_user.login, disk_usage_path.strpath, str(cls.DISK_USAGE_FILE_SIZE)],
                logger
            ))
        for p in workers:
            cls.__check_worker_status(p, logger, warn_on_fail=True)

    def resource_node(self, local_resource):
        no, rid, meta = local_resource
        if self.new_layout:
            bucket = self.buckets[no]
            node = py.path.local(bucket.path).join(*ctr.relpath(rid))
        else:
            if not meta:
                return None
            base = py.path.local(self.config.client.tasks.data_dir)
            node = (
                base.join(*(list(ctt.relpath(meta["task"]["id"]) + [meta["file_name"]])))
                if rid else
                py.path.local(meta)
            )
        return node

    def __dropper(self, q, dropped_nodes):
        total = 0
        while True:
            local_resource = q.get()
            if not local_resource:
                break
            node = self.resource_node(local_resource)
            if node is None:
                continue
            self.__worker_call(
                ["empty", "--drop", self.data_owner, str(node)],
                self.logger, check=True, warn_on_fail=True
            )
            dropped_nodes.put(node)
            gevent.sleep()
            total += 1
        return total

    def __skybone_notifier(self, q):
        total = 0
        node = "None but True"
        while node:
            chunk = []
            while len(chunk) < self.SKY_METADATA_DROP_CHUNK:
                node = q.get()
                if not node:
                    break
                chunk.append(node)
                gevent.sleep()

            if chunk:
                self.logger.debug("Notifying skybone about %d dropped resources.", len(chunk))
                p = sp.Popen(
                    ["/skynet/tools/skybone-ctl", "notify"] + map(str, chunk),
                    close_fds=True, stdout=sp.PIPE, stderr=sp.PIPE
                )
                self.__check_worker_status(p, self.logger, warn_on_fail=True)
            total += len(chunk)
        self.logger.info("Totally notified skybone about %d dropped resources.", total)

    def __symlink_dropper(self, chunk):
        self.logger.info("Dropping %r symlinks.", len(chunk))
        pid = gevent.os.fork()
        if pid:
            self.logger.debug("Waiting for symlinks dropper child process #%r", pid)
            return gevent.os.waitpid(pid, 0)

        try:
            console.setProcTitle(atypes.PROCESS_TITLE_PREFIX + " - symlinks dropper")
            common_os.User.Privileges().__enter__()
            storage = py.path.local(self.buckets.storage.path)
            [storage.join(*ctr.relpath(rid)).remove(ignore_errors=True) for _, rid, _ in chunk]
        finally:
            os._exit(0)

    def dropdirs(self, rids, workers=None):
        """
        DANGEROUS!!!
        This method should not be called in parallel with any resource synchronization method(s).
        It REALLY removes a lot of directories!!!

        :param rids:    an iterable consisting of (bucket no., resource identifier, resource metadata) or
                        (`None`, `None`, file path) in case of usage as trash dropper for CLEANUP task on old layout
        :param workers: Override amount of workers to be used for files removal
        """

        random.shuffle(rids)
        q = gevent.queue.Queue(items=rids)
        dropped_nodes = gevent.queue.Queue()
        threads = [
            gevent.Greenlet(self.__dropper, q, dropped_nodes)
            for _ in xrange(workers or len(self.buckets) * 2 or 3)
        ]
        map(q.put, (None,) * len(threads))
        notifier = gevent.Greenlet(self.__skybone_notifier, dropped_nodes)
        self.logger.debug("Starting to remove %d resource directories with %d workers.", len(rids), len(threads))
        map(gevent.Greenlet.start, threads + [notifier])
        map(self.__symlink_dropper, common_itertools.chunker(rids, self.SYMLINKS_DROP_CHUNK))
        map(gevent.Greenlet.join, threads)
        self.logger.debug("All workers finished. Waiting for skybone notifier thread.")
        dropped_nodes.put(None)
        notifier.join()

    @classmethod
    def dump_disk_usage(cls, logger, mode, taskdir, logdir, unprivileged_user, wait=False):
        disk_usage_path = logdir.join(cls.DISK_USAGE_FILES[mode])
        if disk_usage_path.isfile():
            logger.debug("Dumping disk usage: %s", disk_usage_path)
            worker = cls.__worker_call(
                ["disk_usage", unprivileged_user.login, disk_usage_path.strpath, taskdir.strpath],
                logger
            )
            if wait:
                cls.__check_worker_status(worker, logger, warn_on_fail=True)
            else:
                return [worker]
        return []

    @classmethod
    def dump_monitoring_logs(
        cls, logger, taskdir, logdir, monitoring_logs, unprivileged_user, dump_disk_usage=True, peak_dumped=False,
    ):
        workers = []

        for log in monitoring_logs:
            log_path = py.path.local(log.path)
            if log_path.exists():
                logger.debug("dumping system log: %s (offset=%s)", log_path, log.offset)
                workers.append(cls.__worker_call(
                    [
                        "cut_log", unprivileged_user.login, logdir.join(log.label).strpath,
                        log_path.strpath, str(log.offset), str(log.inode),
                    ],
                    logger
                ))

        if dump_disk_usage:
            workers.extend(cls.dump_disk_usage(logger, ctm.DiskUsageType.FINAL, taskdir, logdir, unprivileged_user))

            peak_disk_usage_path = logdir.join(cls.DISK_USAGE_FILES[ctm.DiskUsageType.PEAK])
            if peak_disk_usage_path.isfile() and not peak_dumped:
                logger.debug("peak disk usage is unused, removing: %s", peak_disk_usage_path)
                with cls.UserPrivileges():
                    peak_disk_usage_path.remove()

        else:
            logger.debug("dumping disk usage is disabled")

        for worker in workers:
            cls.__check_worker_status(worker, logger, warn_on_fail=True)

    @staticmethod
    def process_is_not_running_or_zombie(process):  # type: (psutil.Process) -> bool
        if not process.is_running():
            return True
        try:
            status = process.status() if common_system.inside_the_binary() else process.status
            return status == psutil.STATUS_ZOMBIE
        except psutil.NoSuchProcess:
            return True

    @classmethod
    def stop_process(cls, logger, name, pid):
        try:
            process = psutil.Process(pid)
        except psutil.NoSuchProcess:
            logger.warning("No process with pid=%s found", pid)
        else:
            process_name = process.name() if common_system.inside_the_binary() else process.name
            if process_name == name:
                logger.info("Terminating %s process with pid=%s", name, pid)
                with cls.UserPrivileges():
                    try:
                        process.terminate()
                    except psutil.NoSuchProcess:
                        logger.warning("Process %s with pid %s terminated by self or another process", name, pid)
                        return
                ret, slept = common_itertools.progressive_waiter(
                    tick=0, max_tick=1, max_wait=10,
                    checker=lambda: cls.process_is_not_running_or_zombie(process),
                    sleep_func=gevent.sleep
                )
                if not ret:
                    logger.warning("Unable to terminate %s process with pid=%s after %ds", name, pid, slept)
            else:
                logger.warning("Process with pid %s is not '%s' but '%s', ignoring", pid, name, process_name)

    @property
    def fs_lock(self):
        @contextlib.contextmanager
        def logging_lock():
            self.tee.debug("Acquiring FS lock...")
            try:
                type(self).fs_lock.acquire()
                yield
            finally:
                type(self).fs_lock.release()
                self.tee.debug("Releasing FS lock.")
        return logging_lock()

    def tasks_dir_lock(self, task_id):
        @contextlib.contextmanager
        def logging_lock():
            self.tee.debug("Acquiring lock for task #%s directory.", task_id)
            try:
                type(self)._tasks_dir_lock(task_id)
                yield
            finally:
                type(self)._tasks_dir_unlock(task_id)
                self.tee.debug("Releasing lock for task #%s directory", task_id)
        return logging_lock()

    def _skynet_get(self, skyid, size, path):
        if not skyid:
            return None
        try:
            assert self._copier_api  # Ensure copier API patched to use gevent
            common_share.skynet_get(
                skyid, path, None,
                size=size, fallback_to_bb=True, logger=self.logger,
                **self.sky_kws
            )
        except Exception:
            self.logger.exception("Failed to fetch skynet ID %r", skyid)
            return False
        self.logger.info("Skynet ID %r fetched successfully.", skyid)
        return True

    def __fastbonize_url(self, url):
        try:
            # This actually a hack to load skynet.copier's egg.
            # In case it will be broken, the code will "silently" fallback to non-fasbonized mode.
            import api.copier
            api.copier.Copier()
            try:
                import ya.skynet.services.copier.client.utils as copier_tools
            except ImportError:
                import ya.skynet.services.skybone.client.utils as copier_tools
            return copier_tools.fastbonizeURL(url)
        except Exception as ex:
            self.logger.error("Unable to load skynet.copier's utility module: %s", str(ex))
            return None

    def _http_get(self, resource, resource_path):
        """
        Try to download resource to resource_path via http from mds.
        Returns path to downloaded resource.
        In case of resource with old directory format or empty file returns None.
        """
        if resource["md5"] == ctr.EMPTY_FILE_MD5 and resource["size"] == 0 and not resource["multifile"]:
            return None
        if not resource.get("multifile") and resource.get("executable") is None:
            return None
        lite_resource = dict(resource)
        lite_resource.pop("sources", None)
        path = self.__worker_call(
            ["download", json.dumps(lite_resource), resource_path], self.logger, check=True,
            wrc=atypes.MDS_DOWNLOAD_NOT_ALLOWED
        )
        return path.strip() if path else path

    def _rsync_get(self, url, size, path):
        if not url:
            return None
        self.logger.info("Fetching data from %r", url)
        cmd = [
            "rsync",
            "-vv",              # set pretty big verbosity level
            "--checksum",       # skip based on checksum, not mod-time & size
            "--recursive",      # recurse into directories
            "--partial",        # keep partially transferred files
            "--links",          # copy symlinks as symlinks
            "--perms",          # preserve permissions
            "--group",          # preserve group
            "--chmod=+w",       # affect file and/or directory permissions
            "--inplace",        # update destination files in-place
            "--bwlimit=75000",  # limit I/O bandwidth; KBytes per second
            "--timeout=" + str(common_share.calculate_timeout(size)),  # set I/O timeout in seconds
            url,
            path + "/"
        ]
        if self.config.this.system.family not in ctm.OSFamily.Group.OSX:
            cmd.append('--contimeout=60')  # set daemon connection timeout in seconds

        env = os.environ.copy()
        env["PATH"] = common_os.path_env(None)
        kwargs = dict(close_fds=True, stdout=sp.PIPE, stderr=sp.STDOUT, env=env)
        if self.config.this.system.family == ctm.OSFamily.CYGWIN:
            # A "golden" crutch for Cygwin - without a shell rsync will report a error like
            # "connection closed unexpectedly" after receiving all the resource data.
            cmd = " ".join(cmd)
            kwargs["shell"] = True
        p = sp.Popen(cmd, **kwargs)
        self.logger.debug("Started subprocess #%d with command %r", p.pid, cmd)
        try:
            for l in p.stdout:
                self.logger.debug("[rsync] " + l.strip())
            self.logger.debug("EOF received.")
            if p.wait():
                self.logger.error("Subprocess finished with exit code %d", p.returncode)
                return False
        except gevent.GreenletExit:
            p.kill()
            p.wait()
            raise
        return True

    def _create_resource(self, files, *args, **kwargs):
        with autils.TranslateSkyboneErrors(self.tee, "Unable to create resource"):
            handler = self._copier_api.createExEx(files, *args, **kwargs)
            skyid = handler.wait().resid()
        return skyid

    def _create_resource_ex(self, files, *args, **kwargs):
        with autils.TranslateSkyboneErrors(self.tee, "Unable to create resource"):
            capi = self._copier_api
            handler = capi.createExEx(files, *args, **kwargs)
            skyid = handler.wait().resid()
            meta = list(capi.list(skyid))
        return skyid, meta

    def _reshare(self, resource, inloc):
        ex_skyid = resource["skynet_id"]
        if not ex_skyid:
            return
        infile = inloc.join(py.path.local(resource["file_name"]).basename)
        skyid = self._create_resource([infile.basename], cwd=infile.dirname)
        if skyid == ex_skyid:
            return

        self.tee.warning("Skynet ID %r differs from expected %r. Trying to fix it.", skyid, ex_skyid)
        try:
            files = api.copier.Copier().list(ex_skyid, timeout=self.SKY_FILES_TIMEOUT).files()
        except Exception as ex:
            self.tee.error("Unable to fetch torrent %r metadata: %s", ex_skyid, ex)
            files = []

        for meta in files:
            f = inloc.join(meta["name"])
            if meta["type"] == "file":
                if not f.check(file=1):
                    raise aerrors.InvalidData("File {!r} missing in the resource.".format(meta))
                if meta["executable"]:
                    f.chmod(f.stat().mode | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH)
                else:
                    f.chmod(f.stat().mode & ~stat.S_IXUSR & ~stat.S_IXGRP & ~stat.S_IXOTH)
            if meta["type"] == "dir" and not f.check(dir=1):
                raise aerrors.InvalidData("Directory {!r} missing in the resource.".format(meta))
            if meta["type"] == "symlink" and not f.check(link=1):
                raise aerrors.InvalidData("Symlink {!r} missing in the resource.".format(meta))
            gevent.sleep(0)  # yield control to any other thread

        if files:
            self.tee.debug("Trying to share the resource once again.")
            skyid = self._create_resource([infile.basename], cwd=infile.dirname)
            if skyid == ex_skyid:
                self.tee.info("Resource reshared successfully.")
                return
            self.tee.error("Resharing failed one again.")

        self.__worker_call(["empty", self.users.service.login, inloc.strpath], self.logger).wait()

        raise aerrors.InvalidResource(
            "Skynet ID {!r} differs from expected {!r} - file(s) differs".format(skyid, ex_skyid)
            if files else
            "Skynet ID {!r} differs from expected {!r} - resource unavailable".format(skyid, ex_skyid)
        )

    @staticmethod
    def __check_worker_status(p, logger, wrc=None, warn_on_fail=False):
        try:
            stdout, stderr = map(str.strip, p.communicate())
        except gevent.GreenletExit:
            logger.warning("Greenlet is currently stopping. Killing the worker.")
            p.kill()
            raise
        if p.returncode:
            msg = "Worker failed with code {}. {}".format(
                p.returncode, "Output follows:" if stdout or stderr else "No output taken."
            )
            if stderr:
                msg += "\n{hr}STDERR{hr}\n{out}".format(hr="-" * 40, out=stderr)
            if stdout:
                msg += "\n{hr}STDOUT{hr}\n{out}".format(hr="-" * 40, out=stdout)
            if warn_on_fail or (wrc and p.returncode == wrc):
                logger.warning(msg)
                return None
            logger.error(msg)
            raise aerrors.WorkerFailed("Worker #{} failed. See logs for details".format(p.pid))
        elif stderr:
            msg = "Worked #{pid} return code {code}, but not empty stderr: \n{hr}STDOUT{hr}\n{out}".format(
                pid=p.pid,
                code=p.returncode,
                hr="-" * 40,
                out=stderr
            )
            logger.error(msg)
        return stdout

    def _relocate(self, resource, bucket, inloc, filoc, fname, move_skybone=True):
        need_reshare = False
        if not self.new_layout:
            # Lock is required to isolate changes in task's working directory because of several resources
            # can be moved to a single task's directory.
            with self.tasks_dir_lock(resource["task"]["id"]):
                p = self.__worker_call(
                    ["move", "--skybone", str(bool(resource["skynet_id"]) and move_skybone)] +
                    (["--readonly", "no"] if self.task_id == resource["task"]["id"] else []) +
                    [self.users.unprivileged.login, inloc.strpath, filoc.strpath, fname],
                    self.logger
                )
                self.logger.info(
                    "Relocating %r from %r to %r. Waiting for worker #%d",
                    fname, inloc.strpath, filoc.strpath, p.pid
                )
                need_reshare = self.__check_worker_status(p, self.logger, self.WORKER_SPECIAL_EXIT_CODE) is None

        p = None
        fifile = filoc.join(fname if not self.new_layout else fname.split("/")[-1])
        if fifile.check(dir=1):
            p = self.__worker_call(["tidy_up", self.data_owner, fifile.strpath], self.logger)

        if p:
            self.logger.info("Recursively taking ownership on '%s'. Waiting for worker #%d", fifile, p.pid)
            self.__check_worker_status(p, self.logger)

        if need_reshare:
            self.tee.info("Failed relocating resource's skybone metadata. Resharing it.")
            skyid = self._create_resource([fifile.basename], cwd=fifile.dirname)
            if skyid != resource["skynet_id"]:
                raise aerrors.InvalidResource(
                    "Skynet ID {!r} differs from expected {!r}".format(skyid, resource["skynet_id"])
                )

        if bucket.no is not None:
            sympath = py.path.local(self.buckets.storage.path).join(*ctr.relpath(resource["id"]))
            self.tee.info("Creating resource symlink '%s' -> '%s'", sympath, filoc)
            with self.fs_lock:
                sympath.dirpath().ensure(dir=1)
            if sympath.check(link=1):
                sympath.remove()
            with self.fs_lock:
                sympath.mksymlinkto(filoc)

        self.rest.resource[resource["id"]].source({"host": self.config.this.id})
        self.buckets.unlock(resource, bucket, True)
        return atypes.ResourceSyncInfo(resource, fifile.strpath, True, resource["size"])

    def __send_mail(self, logger, hostname, subject, text):
        headers = "From: {}\r\nTo: {}\r\nSubject: [{}] {}\r\n\r\n".format(
            self.MAIL_FROM, self.MAIL_TO, hostname, subject
        )
        message = headers + text
        mail_server = smtplib.SMTP("localhost")
        mail_server.sendmail(self.MAIL_FROM, self.MAIL_TO, message)
        mail_server.quit()
        logger.info("Email %r has been sent to %r", subject, self.MAIL_TO)

    def __select_bucket(self, resource):
        for _ in xrange(len(self.buckets) or 1):
            bucket = self.buckets.lock(resource)
            self.tee.debug("Selected bucket #%r as the resource storage.", bucket.no)
            inloc = py.path.local(bucket.path).join(*ctr.relpath(resource["id"]))
            self.logger.debug("Ensure directory %r existance.", inloc.strpath)
            try:
                with self.fs_lock:
                    inloc.ensure(dir=1)
                return bucket, inloc
            except py.error.Error as ex:
                self.buckets.unlock(resource, bucket, fetched=False)
                if bucket.banned:
                    continue
                self.tee.error("Banning broken bucket #%r: %s", bucket.no, ex)
                bucket.update()
                left = bucket.free * 100 / bucket.total
                self.__send_mail(
                    self.tee, self.config.this.id,
                    "Broken bucket #{} banned".format(bucket.no),
                    textwrap.dedent("""
                    Broken bucket #{} banned because of error: {}
                    Space left on device: {} of {} ({:d}%)
                    """.format(
                        bucket.no, str(ex),
                        common_format.size2str(bucket.free), common_format.size2str(bucket.total),
                        left
                    ))
                )
                bucket.ban()
        raise aerrors.NoSpaceLeft("No usable storage partitions left on the host.")

    def __record_resource_sync(self, resource_id, start_datetime, end_datetime=None):
        if end_datetime is None:
            end_datetime = dt.datetime.utcnow()
        common_statistics.Signaler().push(dict(
            type=cts.SignalType.RESOURCE_SYNC,
            date=start_datetime,
            timestamp=start_datetime,
            resource_id=resource_id,
            task_id=self.task_id,
            sync_duration=int((end_datetime - start_datetime).total_seconds() * 1000),
            client_id=common_config.Registry().this.id,
        ))

    def __clear_path(self, inloc):
        self.logger.debug("Empty directory %r.", inloc.strpath)
        self.__worker_call(["empty", self.users.service.login, inloc.strpath], self.logger, check=True)

    def __call__(self, resource_id, restore=False):
        self.tee.info("%s resource #%r.", ("Restoring" if restore else "Synchronizing"), resource_id)

        now = dt.datetime.utcnow()
        bucket, cache = self.buckets.check(resource_id)
        resource = cache and cache.meta
        if not cache or cache.updated + dt.timedelta(hours=self.UPDATE_META) < now:
            resource = (self.rest << self.rest.HEADERS({ctm.HTTPHeader.NO_LINKS: "true"})).resource[resource_id][:]
            if cache and self.config.this.id not in resource["sources"]:
                self.tee.warning("Resource #%d is in cache, but server don't known about that!", resource_id)
            if cache:
                self.buckets.actualize(resource)
        self.logger.debug(
            "%r resource %r #%s owned by %r created by task #%s of size %s",
            resource["state"], resource["type"], resource["id"], resource["owner"], resource["task"]["id"],
            common_format.size2str(resource["size"])
        )

        filoc, fipath = None, None
        if not self.new_layout:
            filoc = self.task_dir(resource["task"]["id"])
            fipath = filoc.join(resource["file_name"])
        elif cache:
            filoc = py.path.local(bucket.path).join(*ctr.relpath(resource_id))
            fipath = filoc.join(resource["file_name"].split("/")[-1])

        should_exists = self.config.this.id in resource.get("sources", [])

        need_to_touch_resource = True
        accessed = resource.get("time", {}).get("accessed")
        if accessed:
            accessed = aniso8601.parse_datetime(accessed).replace(tzinfo=None)
            delay = dt.timedelta(seconds=self.config.common.resources.touch_delay)
            # Touch resource only if it has not been used long enough.
            # This prevents contention when updating popular resources.
            need_to_touch_resource = accessed < dt.datetime.utcnow() - delay

        if (cache or should_exists) and fipath and fipath.check():
            if not cache:
                self.buckets.record(resource, None, False)

            touched = False
            if not should_exists:
                self.rest.resource[resource["id"]].source({"host": self.config.this.id})
                touched = True
            elif need_to_touch_resource:
                self.rest.resource[resource_id].touch()  # Just touch it
                touched = True

            if touched:
                self.buckets.touch(resource)  # advance `accessed` timestamp in cache

            self.logger.debug("Resource #%s already exists on this host at '%r'.", resource_id, fipath)
            utcnow = dt.datetime.utcnow()
            self.__record_resource_sync(resource_id, utcnow, utcnow)
            return atypes.ResourceSyncInfo(resource, fipath.strpath, False, resource["size"])

        restore = restore and resource["state"] in (ctr.State.READY, ctr.State.BROKEN)
        if resource["state"] != ctr.State.READY:
            if not restore:
                raise aerrors.InvalidResource("Resource #{} is not ready. Cannot sync it.".format(resource["id"]))
            if not resource["skynet_id"]:
                raise aerrors.InvalidResource("Resource #{} is in {} state and has no skybone sources.".format(
                    resource["id"], resource["state"]
                ))

        if not self.new_layout and cache:
            resource = (self.rest << self.rest.HEADERS({ctm.HTTPHeader.NO_LINKS: "true"})).resource[resource_id][:]

        self.logger.debug(
            "Resource #%s of type '%s' with filename '%s' doesn't exist on this host, download it.",
            resource_id, resource["type"], resource["file_name"],
        )

        sync_start_datetime = dt.datetime.utcnow()
        try:
            bucket, inloc = self.__select_bucket(resource)
            if not filoc:
                filoc = py.path.local(bucket.path).join(*ctr.relpath(resource["id"]))

            if resource["size"] < self.SKYNET_MIN_RESOURCE_SIZE and resource.get("mds"):
                try:
                    if self._http_get(resource, inloc.strpath) is not None:
                        self.logger.info("Resource %s fetched via http.", resource_id)
                        sync_info = self._relocate(
                            resource, bucket, inloc, filoc, resource["file_name"], move_skybone=False
                        )
                        self.__record_resource_sync(resource_id, sync_start_datetime)
                        return sync_info
                    else:
                        self.logger.warning("Can't fetch resource %s via http.", resource_id)
                        self.__clear_path(inloc)
                except:
                    self.logger.exception("Can't fetch resource %s via http.", resource_id)
                    self.__clear_path(inloc)

            try:
                if False:
                    # resource["md5"] == ctr.EMPTY_FILE_MD5 and resource["size"] == 0 and not resource["multifile"]:
                    file_name = resource["file_name"]
                    file_path = inloc.join(file_name if not self.new_layout else file_name.split("/")[-1])
                    file_path.write("", ensure=True)
                    copied = file_path.check()
                    if copied and resource["executable"]:
                        file_path.chmod(file_path.stat().mode | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH)
                else:
                    copied = self._skynet_get(resource["skynet_id"], resource["size"], inloc.strpath)
                if copied:
                    if restore:
                        self.logger.info("Resource #%r restored successfully!", resource_id)
                        self.rest.resource[resource_id] = dict(state=ctr.State.READY)
                    sync_info = self._relocate(resource, bucket, inloc, filoc, resource["file_name"])
                    self.__record_resource_sync(resource_id, sync_start_datetime)
                    return sync_info
            except:
                logging.exception("Error on fetching resource %s by skynet", resource_id)

            self.__clear_path(inloc)
            try:
                if self._http_get(resource, inloc.strpath) is not None:
                    self.logger.info("Resource %s fetched via http. Sharing it.", resource_id)
                    self._reshare(resource, inloc)
                    sync_info = self._relocate(resource, bucket, inloc, filoc, resource["file_name"])
                    self.__record_resource_sync(resource_id, sync_start_datetime)
                    return sync_info
            except:
                logging.exception("Error fetching resource %s via http", resource_id)

            # download resource with rsync
            attempts = 4
            self.__clear_path(inloc)
            links = filter(None, it.chain.from_iterable(
                (self.__fastbonize_url(l["url"]), l["url"])
                for l in self.rest.resource[resource_id].data.rsync[:]
            ))[:attempts]
            self.logger.debug("Trying following data sources: %r", links)
            if any(self._rsync_get(l, resource["size"], inloc.strpath) for l in links):
                self.logger.info("Resource fetched via rsync. Sharing it.")
                self._reshare(resource, inloc)
                sync_info = self._relocate(resource, bucket, inloc, filoc, resource["file_name"])
                self.__record_resource_sync(resource_id, sync_start_datetime)
                return sync_info

            self.__clear_path(inloc)

            raise aerrors.ResourceNotAvailable("Resource #{} is not available at all sources {!r}.".format(
                resource_id, resource["sources"]
            ))

        except gevent.GreenletExit:
            try:
                self.tee.warning("Download aborted. Erasing incomplete data.")
                self.__worker_call(["empty", self.users.service.login, inloc.strpath], self.logger).wait()
            except BaseException:
                pass
            self.buckets.unlock(resource, bucket, False)
            raise
        except Exception:
            self.buckets.unlock(resource, bucket, False)
            raise

    def __md5sum(self, path):
        if os.path.isfile(path):
            self.logger.debug("Calculating MD5 sum for %s", path)
            p = sp.Popen(["openssl", "md5", path], stdout=sp.PIPE, stderr=sp.PIPE)
            output = self.__check_worker_status(p, self.logger)
            if " " not in output:
                # expected line 'MD5(/tmp/foo.txt)= d3fe3ba0dde8c94e11b4bf309da164e1'
                raise aerrors.WorkerFailed("Unexpected 'openssl md5' output: {}".format(output))
            return output.split()[-1]
        return ""

    def __mkreslink(self, src, dst):
        """
        This method creates a symlink to ready resource from task working directory, i.e. owned by `sandbox` user.
        """
        with self.UserPrivileges():
            dst.mksymlinkto(src)
            # Due to OS limitations, under OS X we have a nameless group with only gid available
            dst.chown(self.users.unprivileged.uid, self.users.unprivileged.gid)

    def complete(self, resource, broken=False):
        fetched = False
        rid = resource.meta["id"]
        relpath = resource.meta["file_name"]
        pack_tar = resource.attrs.pack_tar
        infile = resource.data.join(relpath)
        inloc, fname = infile.dirpath(), infile.basename
        self.tee.info("Completing resource #%s at path %r", rid, infile.strpath)

        bucket = None
        files_count = 0
        try:
            if not broken:
                try:
                    title = "[check_resource_data #{} for task #{}]".format(rid, self.task_id)
                    with common_os.Subprocess(title, logger=self.logger, using_gevent=True):
                        common_fs.check_resource_data(infile.strpath)
                except Exception as ex:
                    raise aerrors.InvalidResource(
                        "check_resource_data #{} for task #{} subprocess failed: {}".format(rid, self.task_id, ex)
                    )
            skynet_id = None
            exists = inloc.check(exists=1)
            if not exists and not broken:
                raise aerrors.InvalidData("Resource #{} data does not exist at '{}'".format(rid, infile))
            if not exists:
                self.rest.resource[rid] = {"state": ctr.State.BROKEN}
                self.buckets.drop_records([rid])
                return

            if pack_tar is None:
                pack_tar = 0

            try:
                pack_tar = int(pack_tar)
            except (TypeError, ValueError):
                pack_tar = int(distutils.util.strtobool(pack_tar))

            if pack_tar:
                archive_ext = common_mds.compression.base.CompressionType.val2str(pack_tar).lower()
                self.logger.info("Pack %s to %s", infile.strpath, archive_ext)
                relpath = ".".join((relpath, archive_ext))
                old_path = infile.strpath
                infile = resource.data.join(relpath)
                if pack_tar == common_mds.compression.base.CompressionType.TGZ:
                    pack_params = "-czf"
                else:
                    pack_params = "-cf"

                try:
                    sp.check_call(
                        ["tar", pack_params, infile.strpath, "-C", inloc.strpath, fname],
                        preexec_fn=common_os.User.Privileges().__enter__
                    )
                    with common_os.Capabilities(common_os.Capabilities.Cap.Bits.CAP_CHOWN):
                        os.chown(
                            infile.strpath,
                            common_os.User.service_users.unprivileged.uid,
                            common_os.User.service_users.unprivileged.gid
                        )
                    inloc, fname = infile.dirpath(), infile.basename
                    self.logger.info("Packed file: %s", infile.strpath)
                    self.logger.info("Remove resource file: %s", old_path)
                    with common_os.Capabilities(common_os.Capabilities.Cap.Bits.CAP_DAC_OVERRIDE):
                        if os.path.isfile(old_path):
                            os.remove(old_path)
                        else:
                            shutil.rmtree(old_path)
                except sp.CalledProcessError:
                    self.logger.exception("Error on creating tar")
                    raise aerrors.InvalidResource("Error on creating tar archive from file or directory")
                except OSError:
                    self.logger.exception("Error on swapping tar and resource")

            bucket, filoc = self.__select_bucket(resource.meta)
            self.logger.debug("Counting size of resource #%s at %r", rid, infile.strpath)
            resource.meta["size"] = autils.get_disk_usage(infile.strpath, allow_files=True)[1]
            common_worker_args = ["--rm_empty_files"] if resource.service else []
            if self.new_layout:
                fifile = filoc.join(relpath)
                fidir = fifile.dirpath()
                self.logger.info("Relocating resource #%s: '%s'->'%s'. Dropping quota first.", rid, inloc, fifile)
                autils.ProjectQuota(
                    resource.data.strpath,
                    create=False,
                    popen=ft.partial(sp.Popen, preexec_fn=common_os.User.Privileges().__enter__, close_fds=True)
                ).destroy(ignore_errors=True)
                with self.fs_lock:
                    fidir.ensure(dir=1)
                p = self.__worker_call(
                    [
                        "move", "--skybone", "no", "--readonly", "no", "--recursive",
                        self.users.service.login, inloc.strpath, fidir.strpath, fname
                    ] + common_worker_args,
                    self.logger
                )
                self.logger.debug(
                    "Relocating resource #%s (%r) of size %s from '%s' to '%s'. Waiting for worker #%d",
                    rid, fname, common_format.size2str(resource.meta["size"]), inloc, fidir, p.pid
                )
                self.__check_worker_status(p, self.logger)

                sympath = py.path.local(self.buckets.storage.path).join(*ctr.relpath(rid))
                self.tee.info("Creating resource symlinks at %r and %r", infile.strpath, sympath.strpath)
                with self.fs_lock:
                    sympath.dirpath().ensure(dir=1)
                if sympath.check(link=1):
                    sympath.remove()
                with self.fs_lock:
                    sympath.mksymlinkto(filoc)
                self.__mkreslink(fifile, infile)
            elif resource.meta["task"]["id"] != self.task_id:
                taskdir = self.task_dir(resource.meta["task"]["id"])
                fifile = taskdir.join(relpath)
                filoc = fifile.dirpath()
                self.logger.info(
                    "Relocating resource #%s: '%s'->'%s'. Creating target directory '%s' and dropping quota first.",
                    rid, inloc, relpath, filoc
                )
                self.__worker_call(["ensure", self.users.unprivileged.login, filoc.strpath], self.logger, check=True)

                autils.ProjectQuota(
                    inloc.strpath,
                    create=False,
                    popen=ft.partial(sp.Popen, preexec_fn=common_os.User.Privileges().__enter__, close_fds=True)
                ).destroy(ignore_errors=True)
                p = self.__worker_call(
                    [
                        "move", "--skybone", "false", "--recursive",
                        self.users.unprivileged.login, inloc.strpath, filoc.strpath, fname
                    ] + common_worker_args,
                    self.logger
                )
                self.logger.debug(
                    "Relocating resource #%s (%r) of size %s from '%s' to '%s'. Waiting for worker #%d",
                    rid, relpath, common_format.size2str(resource.meta["size"]), inloc.strpath, filoc, p.pid
                )
                self.__check_worker_status(p, self.logger)

                self.tee.info("Creating symlink %r->%r", infile.strpath, fifile.strpath)
                self.__mkreslink(fifile, infile)
                filoc = taskdir
            else:
                filoc, fifile = resource.data, resource.data.join(relpath)
                if not broken:
                    p = self.__worker_call(
                        ["tidy_up", self.users.unprivileged.login, fifile.strpath] + common_worker_args,
                        self.logger
                    )
                    self.logger.info("Recursively taking ownership on '%s'. Waiting for worker #%d", fifile, p.pid)
                    self.__check_worker_status(p, self.logger)

            multifile = os.path.isdir(fifile.strpath)
            compression_type = (
                not multifile and common_mds.compression.base.archive_type(fifile.strpath) or
                common_mds.compression.base.CompressionType.NONE
            )
            mds_key = None
            namespace = None
            md5sum = None
            if not broken:
                if resource.share:
                    # Legacy behaviour support - always share last part of final resource's data path
                    self.logger.debug("Sharing the resource #%s ('%s' at '%s')", rid, fifile.basename, fifile.dirname)
                    if multifile:
                        skynet_id = self._create_resource([fifile.basename], cwd=str(fifile.dirname))
                    else:
                        skynet_id, meta = self._create_resource_ex([fifile.basename], cwd=str(fifile.dirname))
                        md5sum = meta[0]["md5sum"]
                    self.logger.info("Resource #%s skynet ID is %r", rid, skynet_id)
                try:
                    group = common_abc.cached_sandbox_group(resource.meta["owner"], rest_client=self.rest)
                    abc_id = common_abc.sandbox_group_to_abc_id(resource.meta["owner"], rest_client=self.rest)
                    group_bucket = group and (group["mds_quota"] or {}).get("name")
                    namespace, _ = common_mds.S3.check_bucket(bucket=group_bucket)
                    mds_strong_mode = group["mds_strong_mode"] if group else False
                except common_rest.Client.HTTPError as ex:
                    if ex.status != requests.codes.SERVER_ERROR:
                        raise
                    self.logger.exception("Error while converting ABC id to S3 bucket")
                    abc_id, namespace, mds_strong_mode = None, None, False
                sync_upload_to_mds = resource.attrs.sync_upload_to_mds
                if (
                    sync_upload_to_mds is None and namespace is not None and
                    (resource.attrs.backup_task or resource.attrs.ttl == "inf")
                ):
                    sync_upload_to_mds = "False"
                if md5sum is None:
                    md5sum = self.__md5sum(fifile.strpath)
                single_empty_file = md5sum == ctr.EMPTY_FILE_MD5 and resource.meta["size"] == 0 and not multifile
                if (
                    not single_empty_file and sync_upload_to_mds is not None and
                    self.config.common.installation != ctm.Installation.LOCAL
                ):
                    try:
                        sync_upload_to_mds = distutils.util.strtobool(sync_upload_to_mds)
                    except ValueError:
                        self.logger.error(
                            "Wrong value of attribute 'sync_upload_to_mds' of resource #%s: %r",
                            resource.id, sync_upload_to_mds
                        )
                    else:
                        if sync_upload_to_mds:
                            locked = [resource.id]
                        else:
                            locked = MdsUploader.rest.resource.backup[common_config.Registry().this.id](
                                resources=[resource.id]
                            )
                        if locked:
                            mds_key, mds_skynet_id = MdsUploader.upload(
                                resource.id, fifile.strpath, resource.meta["size"], sync_upload_to_mds,
                                skynet_id=skynet_id, namespace=namespace, abc_id=abc_id,
                                compression_type=compression_type, mds_strong_mode=mds_strong_mode
                            )
                            if sync_upload_to_mds and skynet_id != mds_skynet_id:
                                s3 = common_mds.S3()
                                if mds_skynet_id and s3.skyboned_enabled:
                                    s3.skyboned_remove(mds_skynet_id, resource.id, logger=self.logger)
                                raise common_mds.MDS.RBTorrentMismatch(
                                    "MDS returned wrong skynet_id for resource #{}: {} instead of {}".format(
                                        mds_key, mds_skynet_id, skynet_id
                                    )
                                )

            if md5sum is None:
                md5sum = self.__md5sum(fifile.strpath)
            if multifile:
                for _, _, files in os.walk(fifile.strpath):
                    files_count += len(files)
            else:
                files_count = 1

            update = {
                "skynet_id": skynet_id,
                "size": resource.meta["size"],
                "md5": md5sum,
                "state": ctr.State.BROKEN if broken else ctr.State.READY,
                "multifile": multifile,
                "file_name": relpath,
                "executable": (False if multifile or not fifile.exists() else fifile.stat().mode & stat.S_IXUSR),
            }
            if mds_key is not None:
                update["mds"] = dict(api_v1.schemas.resource.MDSUpdate.create(
                    key=mds_key,
                    namespace=namespace,
                ))
            resource.meta.update(update)
            fetched = None
        finally:
            if bucket is not None:
                self.buckets.unlock(resource.meta, bucket, fetched)
        self.rest.resource[rid] = update

        self.__signal_resource_registration(resource, files_count)

    def reshare(self, resource_id):
        bucket, cache = self.buckets.check(resource_id)
        if not cache:
            raise aerrors.InvalidResource("No resource #{} record in the local cache.".format(resource_id))
        self.logger.debug("Resharing resource #%d", resource_id)
        cache = self.rest.resource[resource_id][:]
        self.buckets.actualize(cache)
        self._reshare(cache, py.path.local(bucket.path).join(*ctr.relpath(resource_id)))
        return cache

    @classmethod
    def restore_links(cls, logger, buckets, data_dir):
        for res in buckets:
            real_path = py.path.local(buckets[res.bucket_id].path).join(*ctr.relpath(res.resource_id))
            sympath = py.path.local(data_dir).join("resources", *ctr.relpath(res.resource_id))

            try:
                with cls.fs_lock:
                    sympath.dirpath().ensure(dir=1)
                    sympath.mksymlinkto(real_path)
                logger.debug("Created resource symlink '%s' -> '%s'", sympath, real_path)
                gevent.sleep(0)
            except py.error.EEXIST:
                logger.debug("Symlink '%s' already exists!", sympath)

    @classmethod
    def erase_bucket(cls, logger, bucket, data_dir):
        resources = [_[0] for _ in bucket.resources()]
        logger.info("Removing cache records")
        bucket.erase()

        logger.info("Removing symlinks")
        for rid in resources:
            sympath = py.path.local(data_dir).join("resources", *ctr.relpath(rid))
            try:
                sympath.remove()
                gevent.sleep(0)
            except py.error.ENOENT as ex:
                logger.warning("Error removing '%s': %s", sympath, ex)

        return resources

    @classmethod
    def _setup_hgrc(cls, cache_path):
        """ Setup hg to use ssh arcadia-hg repository with local arcadia key """
        config = common_config.Registry()
        with cache_path.join(".hg/hgrc").open('w') as f:
            f.write(cls.HGRC_TEMPLATE.format(
                arcadia_url=config.client.sdk.hg.arcadia.ssh_url,
                arcadia_key=config.client.sdk.hg.arcadia.ssh_key
            ))

    @classmethod
    def setup_hg_cache(cls, logger, cache_path, cache_check_path, res_path, users, check_space=None):
        if cache_path.exists():
            logger.warning("Hg cache path already exists, deleting it")
            if cache_check_path.exists():
                with cls.UserPrivileges():
                    cache_check_path.remove()
            cls.__worker_call(["empty", "--drop", "nobody", cache_path.strpath], logger, check=True)

        owner = users.service
        with cls.UserPrivileges():
            cache_path.mkdir()
            cache_path.chown(owner.uid, owner.gid)
            cache_path.chmod(0o755)

        if check_space:
            free_space = common_system.get_disk_space(cache_path.strpath)[1]
            if check_space + 100 * 1024 ** 2 > free_space:
                raise aerrors.NoSpaceLeft(
                    "There is no space available to unpack hg cache: {}".format(free_space)
                )

        cmd = ["tar", "-zxf", res_path.strpath, "-C", cache_path.strpath, "--no-overwrite-dir"]
        logger.info("Unpacking hg cache: %s", cmd)

        p = sp.Popen(cmd, stdout=sp.PIPE, stderr=sp.PIPE)
        cls.__check_worker_status(p, logger)

        with cls.UserPrivileges():
            cache_check_path.ensure(file=True)
            cache_check_path.chown(owner.uid, owner.gid)
            cache_check_path.chmod(0o664)
        logger.info("Setting up hg cache completed.")

    @classmethod
    def update_hg_cache(cls, logger, hg_env, cache_path, rev="default"):
        cwd = cache_path.strpath
        cls._setup_hgrc(cache_path)

        cmd = ["hg", "pull"]
        logger.info("Updating hg cache: %s", cmd)
        p = sp.Popen(cmd, stdout=sp.PIPE, stderr=sp.PIPE, env=hg_env, cwd=cwd)
        cls.__check_worker_status(p, logger)

        cmd = ["hg", "up", "-C", "-r", rev]
        logger.info("Setting up hg cache: %s", cmd)
        p = sp.Popen(cmd, stdout=sp.PIPE, stderr=sp.PIPE, env=hg_env, cwd=cwd)
        cls.__check_worker_status(p, logger)

    @classmethod
    def git_repos_gc(cls, logger, repos_path):
        for repo in repos_path:
            cls.__worker_call(["git_gc", repo], logger, check=True, warn_on_fail=True)

    @classmethod
    def complete_registered_logs(cls, session, logs, log_dirs, logger):
        for log in logs:
            log_dir = log_dirs.get(log.type)
            if not log_dir:
                logger.error("Unknown log_type %r for %r", log.type, log.name)
                continue
            src = py.path.local(log.name).realpath()
            dst = py.path.local(log_dir).join("{}_{}".format(session.id, os.path.basename(log.name)))
            logger.debug("Creating hard link %r -> %r", src, dst)
            with cls.UserPrivileges():
                dst.mklinkto(src)

    @classmethod
    def normalize_resource_path(cls, taskdir, path):
        if not os.path.isabs(path):
            path = taskdir.join(path)
        else:
            path = py.path.local(path).realpath()
            if not path.relto(taskdir):
                raise aerrors.InvalidData(
                    "Path {!r} is not relative to the current task's directory".format(path.strpath)
                )
        tmp_dir = taskdir.join("tmp")
        if path == tmp_dir or path.relto(tmp_dir):
            raise aerrors.InvalidData(
                "Path {!r} is located in the task's temporary directory and will be removed "
                "after the task is finished".format(path.strpath)
            )

        return path

    @classmethod
    def task_dir(cls, tid):
        return py.path.local(common_config.Registry().client.tasks.data_dir).join(*ctt.relpath(tid))

    @classmethod
    def task_mounts_dir(cls, tid):
        return py.path.local(common_config.Registry().client.tasks.mounts).join(str(tid))

    @classmethod
    def task_ramdrive_dir(cls, tid):
        return py.path.local(common_config.Registry().client.tasks.ramdrive).join(str(tid))

    @classmethod
    def next_task_mount_point(cls, tid, name=None):
        mounts_dir = cls.task_mounts_dir(tid)
        mounts_dir.ensure(dir=1)
        if name is None:
            names = [int(_) for _ in os.listdir(str(mounts_dir)) if _.isdigit()]
            next_name = max(names) + 1 if names else 0
        else:
            next_name = name
        return mounts_dir.join(str(next_name))

    @classmethod
    def mount_image(cls, logger, src, dst, lxc):
        """ Mounts given image to the path specified. """
        logger.info("Mounting %r to directory %r", src, str(dst))
        args = ["mount_image", str(src), str(dst)]
        if lxc:
            args.extend(["--container", lxc])
        try:
            cls.__worker_call(args, logger, True)
        except aerrors.WorkerFailed as ex:
            raise aerrors.InvalidImage(str(ex))

    @classmethod
    def mount_bind_tmp(cls, logger, src, dst, rootfs, lxc, devbox):
        """ mount -obind src dst """
        logger.info("Binding %r to %r", src, dst)
        args = ["mount_bind_tmp", str(src), str(dst)]
        if lxc:
            args.extend(["--container", lxc, "--rootfs", rootfs])
        if devbox:
            args.extend(["--devbox"])
        try:
            cls.__worker_call(args, logger, True)
        except aerrors.WorkerFailed as ex:
            raise aerrors.InvalidBindMount(str(ex))

    @classmethod
    def mount_overlay(cls, logger, options, mount_point, lxc):
        logger.info("Mounting overlay %r with options %r", mount_point, options)
        args = [
            "mount_overlay",
            "--mount-point", str(mount_point),
            "--options", options,
            "--container", lxc,
        ]
        cls.__worker_call(args, logger, True)

    @classmethod
    def overlay_options(cls, lower_dirs, upper_dir, work_dir):
        options = ["lowerdir={}".format(":".join(map(str, lower_dirs)))]
        if upper_dir:
            options.append("upperdir={}".format(upper_dir))
        if work_dir:
            options.append("workdir={}".format(work_dir))
        return ",".join(options)

    @classmethod
    def umount(cls, logger, mount, container, container_rootfs, tid):
        """
        Unmounts mount point.

        mount `sandbox.agentr.session.Mount`: Mount info.
        """
        logger.info("Unmounting %r", mount)
        args = ["umount", mount.target]
        if container:
            args.extend(["--container", str(container)])
            args.extend(["--rootfs", str(container_rootfs)])

        task_mounts = cls.task_mounts_dir(tid)
        if task_mounts.exists() and py.path.local(mount.target).relto(task_mounts):
            args.append("--remove")
            if len(task_mounts.listdir()) == 1:  # Removing the last auto-generated mount point in directory
                args.append("--remove-parent")
        elif mount.type == ctm.FilesystemType.BIND:
            args.append("--remove")

        try:
            cls.__worker_call(args, logger, True)
        except aerrors.WorkerFailed as ex:
            # How can we react here? Schedule container destroy?
            logger.error("A problem occurred during unmounting '%s': %s", mount.target, ex)
            if mount.type == ctm.FilesystemType.OVERLAY:
                raise
            if container:
                raise aerrors.UmountError(lxc=container)
            raise aerrors.UmountError

        if mount.type == ctm.FilesystemType.OVERLAY:
            options = dict(_.split("=", 1) for _ in mount.source.split(","))
            workdir = options.get("workdir")
            if workdir:
                chown_args = ["chown", common_os.User.service_users.unprivileged.login, workdir]
                if container:
                    chown_args.extend(["--container", container])
                cls.__worker_call(chown_args, logger, True)

    @staticmethod
    def __signal_resource_registration(resource, files_count):
        common_statistics.Signaler().push(dict(
            type=cts.SignalType.RESOURCE_REGISTRATION,
            timestamp=dt.datetime.utcnow(),
            owner=resource.meta["owner"],
            resource_id=resource.meta["id"],
            resource_type=resource.meta["type"],
            size=resource.meta["size"],
            purpose_tag=next(
                iter(
                    filter(
                        lambda tag: tag in ctc.Tag.Group.PURPOSE and tag != ctc.Tag.POSTEXECUTE,
                        common_config.Registry().client.tags
                    ),
                ),
                "NONE"
            ),
            files_count=files_count,
            client_id=common_config.Registry().this.id,
        ))


class Quota(object):
    __db = None
    __logger = None
    __config = None
    __dropper = None

    IDLE_SLEEP = 5  # in seconds

    def __new__(cls, path, create=True):
        return cls.__project_quota(path, create and common_os.User.has_root)

    @classmethod
    def __project_quota(cls, path, create):
        if create:
            common_itertools.progressive_waiter(
                1, cls.IDLE_SLEEP, float("inf"),
                lambda: not cls.__db.query(
                    """SELECT count() FROM "quota_to_drop" WHERE "path" = ?""",
                    (str(path),),
                    one=True
                )[0],
                sleep_first=False,
                sleep_func=gevent.sleep
            )
        return autils.ProjectQuota(
            str(path),
            create=create,
            popen=ft.partial(sp.Popen, preexec_fn=common_os.User.Privileges().__enter__, close_fds=True)
        )

    @classmethod
    def __dropper_loop(cls):
        cls.__logger.info("Project quota dropper started")
        while True:
            # noinspection PyBroadException
            try:
                paths = cls.__db.query("""SELECT "path", "project_id", "chown" FROM "quota_to_drop" """)
                if not paths:
                    gevent.sleep(cls.IDLE_SLEEP)
                    continue
                for path, project_id, chown in paths:
                    quota = cls.__project_quota(path, create=False)
                    try:
                        if not quota.project:
                            quota.project = project_id
                        quota.destroy()
                        if chown:
                            p = sp.Popen(
                                ["/bin/chown", "-R", chown, path],
                                preexec_fn=common_os.User.Privileges().__enter__,
                                close_fds=True, stdout=sp.PIPE, stderr=sp.PIPE
                            )
                            if p.wait():
                                cls.__logger.error("Subprocess finished with exit code %d", p.returncode)
                    except autils.ProjectQuota.Error as ex:
                        cls.__logger.error("Cannot destroy project quota for '%s': %s", ex)
                    else:
                        cls.__db.query("""DELETE FROM "quota_to_drop" WHERE "path" = ?""", (path,))
            except (gevent.GreenletExit, gevent.hub.LoopExit):
                cls.__logger.info("Project quota dropper exited")
                raise
            except Exception:
                cls.__logger.exception("Unexpected error in project quota dropper")
                gevent.sleep(cls.IDLE_SLEEP)

    @classmethod
    def start_dropper(cls, db, config, logger):
        if cls.__dropper is None:
            cls.__db = db
            cls.__logger = logger
            cls.__config = config
            if cls.__config.common.installation not in ctm.Installation.Group.LOCAL:
                cls.__dropper = gevent.spawn(cls.__dropper_loop)

    @classmethod
    def destroy(cls, path, chown=None):
        quota = cls.__project_quota(path, create=False)
        if cls.__config.common.installation in ctm.Installation.Group.LOCAL or not (quota.project or chown):
            return
        cls.__db.query(
            """INSERT INTO "quota_to_drop" ("path", "project_id", "chown") VALUES (?, ?, ?)""",
            (path, quota.project, chown)
        )

    @classmethod
    def usage(cls, sessions):
        with Fetcher.UserPrivileges():
            return [
                s.quota.usage if s.quota and cls.__config.common.installation not in ctm.Installation.Group.LOCAL else 0
                for s in sessions
            ]


UploadItemInfo = collections.namedtuple(
    "UploadItemInfo",
    "resource_id path namespace mds_key skynet_id size abc_id compression_type"
)


class MdsUploader(object):
    __logger = None
    __db = None
    __db_factory = None
    __config = None
    rest = None
    __broker = None
    __uploader = None
    __buckets = None
    __running = None
    __on_fork = None
    __pipe_rpc = None
    __locks_update_time = None
    __sync_uploads = None

    IDLE_SLEEP = 1  # in seconds
    STALLED_LOCKS_SLEEP = 5  # in seconds
    FIRST_RETRY_DELAY = 10  # sec
    MAX_RETRY_DELAY = 1800  # sec
    ASYNC_WORKERS = 10  # number of workers actually do async upload
    SYNC_WORKERS = 42  # number of workers actually do sync upload
    STOP_TIMEOUT = 10  # maximum time allowed to stop MdsUploader, in seconds
    SUBPROCESS_ABORTED_RETRIES = 3  # maximum number of retries of SubprocessAborted on synchronous upload

    class SubprocessKilled(Exception):
        pass

    class LocksUpdateStalled(Exception):
        pass

    class RPCServer(common_os.PipeRPCServer):
        def __init__(self, rpc, async_pool, sync_pool):
            self.__async_pool = async_pool
            self.__sync_pool = sync_pool
            super(MdsUploader.RPCServer, self).__init__(rpc)

        def ping(self):
            job_id = self.__async_pool.spawn(lambda: None, (), {}, title="[ping]")
            while True:
                ready_jobs = self.__async_pool.ready_jobs()
                if job_id in ready_jobs:
                    return self.__async_pool.result(job_id)
                gevent.sleep(.1)

        def upload(self, upload_item, sync=False):
            size_str = common_format.size2str(upload_item.size)
            pool = self.__sync_pool if sync else self.__async_pool
            job_id = pool.spawn(
                MdsUploader.sync_upload,
                (upload_item.resource_id, upload_item.path, upload_item.size),
                dict(
                    skynet_id=upload_item.skynet_id,
                    namespace=upload_item.namespace,
                    abc_id=upload_item.abc_id,
                    compression_type=upload_item.compression_type,
                    recreate_s3_client=True,
                ),
                title="[{} upload resource #{} ({}) to {}]".format(
                    "sync" if sync else "async", upload_item.resource_id, size_str, upload_item.namespace
                ),
                watchdog=10,
            )
            return job_id

        def cancel_jobs(self):
            for job_id in self.__async_pool.pending_jobs():
                self.__async_pool.kill(job_id)

        def ready_jobs(self, sync=False):
            pool = self.__sync_pool if sync else self.__async_pool
            return pool.ready_jobs()

        def result(self, job_id, sync=False):
            pool = self.__sync_pool if sync else self.__async_pool
            return pool.result(job_id)

    @classmethod
    def __resources_to_upload(cls):
        return cls.__db.query_col(
            """
            SELECT "resource"
            FROM "upload_to_mds"
            """,
            log=None,
        )

    @classmethod
    def __initial_lock_resources(cls):
        resources = cls.__resources_to_upload()
        if resources:
            locked_resources = cls.rest.resource.backup[common_config.Registry().this.id](resources=resources)
            cls.__db.query("""DELETE FROM "upload_to_mds" WHERE "resource" NOT IN (??)""", (locked_resources,))
        cls.__locks_update_time = time.time()

    @classmethod
    def __resource_lock_getter(cls):
        cls.__logger.info("Resource lock getter started")
        new_layout = ctc.Tag.NEW_LAYOUT in cls.__config.client.tags
        fetcher = Fetcher(None, None, None, None, cls.__config, None, cls.__buckets)
        while cls.__running:
            start_time = time.time()
            # noinspection PyBroadException
            try:
                resources = cls.__resources_to_upload()
                result = cls.rest.resource.backup[common_config.Registry().this.id].update(resources=resources)
                cls.__locks_update_time = time.time()
                new_resources = result["resources"]
                cancelled = result["cancelled"]
                if cancelled:
                    cls.__db.query("""DELETE FROM "upload_to_mds" WHERE "resource" NOT IN (??)""", (cancelled,))
                uploads = {}
                for res in cls.__buckets.local_resources(ids=new_resources):
                    node = fetcher.resource_node(res)
                    if not node:
                        cls.__logger.warning("Resource #%s has no local path", res.id)
                        continue
                    file_name = res.meta["file_name"]
                    skynet_id = res.meta.get("skynet_id")
                    path = node.join(file_name.split("/")[-1]) if new_layout else node
                    if not path.exists():
                        cls.__logger.warning("Path %s for resource #%s does not exist", path, res.id)
                        continue
                    group = common_abc.cached_sandbox_group(res.meta["owner"], rest_client=cls.rest)
                    abc_id = common_abc.sandbox_group_to_abc_id(res.meta["owner"], rest_client=cls.rest)
                    bucket = group and (group["mds_quota"] or {}).get("name")
                    namespace, _ = common_mds.S3.check_bucket(bucket=bucket, mds_settings=cls.__config.common.mds)
                    compression_type = (
                        not path.isdir() and common_mds.compression.base.archive_type(str(path)) or
                        common_mds.compression.base.CompressionType.NONE
                    )
                    uploads[res.id] = ft.partial(
                        cls.upload,
                        res.id, str(path), res.meta["size"], False,
                        skynet_id=skynet_id, namespace=namespace, abc_id=abc_id, compression_type=compression_type
                    )

                if uploads:
                    locked_resources = cls.rest.resource.backup[common_config.Registry().this.id](
                        resources=list(uploads)
                    )
                    for rid in locked_resources:
                        upload_func = uploads.get(rid)
                        if upload_func is not None:
                            upload_func()

            except (gevent.GreenletExit, gevent.hub.LoopExit):
                cls.__logger.info("Resource lock getter exited")
                raise
            except Exception:
                cls.__logger.exception("Unexpected error in resource lock getter")
            elapsed_time = time.time() - start_time
            gevent.sleep(max(cls.__config.common.resources.backup_interval - elapsed_time, 0))

    @classmethod
    def start(cls, logger, db, buckets, config, rest, on_fork):
        if cls.__uploader is not None:
            return
        cls.__logger = logger
        cls.__db = db
        cls.__buckets = buckets
        cls.__config = config
        cls.rest = rest
        cls.__on_fork = on_fork
        cls.__sync_uploads = {}
        if cls.__config.common.installation != ctm.Installation.LOCAL:
            cls.__initial_lock_resources()
            cls.__pipe_rpc = common_os.PipeRPC(using_gevent=True)
            cls.__broker = gevent.spawn(cls.__broker_loop)
            cls.__resource_lock_getter_greenlet = gevent.spawn(cls.__resource_lock_getter)
            cls.__uploader = gevent.spawn(cls.__loop)
            cls.__sync_uploads_checker = gevent.spawn(cls.__sync_uploads_checker_loop)

    @classmethod
    def stop(cls, graceful=False):
        if cls.__uploader is not None and not cls.__uploader.dead:
            cls.__uploader.kill()
        if not graceful and cls.__broker is not None and not cls.__broker.dead:
            cls.__logger.info("Stopping MDS uploader subprocess...")
            for _ in common_itertools.progressive_yielder(.1, 3, cls.STOP_TIMEOUT, False):
                try:
                    cls.__pipe_rpc("__stop__")
                    break
                except common_os.subprocess.PipeRPCBusyError:
                    pass
            cls.__broker.kill()

    @classmethod
    def __parent_monitor(cls, pipe):
        gevent.os.nb_read(pipe, 1)
        cls.__running = False

    @classmethod
    def __sync_uploads_checker_loop(cls):
        cls.__logger.info("Starting sync uploads checker")
        while cls.__running:
            # noinspection PyBroadException
            try:
                ready_jobs = cls.__pipe_rpc("ready_jobs", sync=True)
                if not ready_jobs:
                    gevent.sleep(cls.IDLE_SLEEP)
                    continue
                idle = True
                for job_id in ready_jobs:
                    cls.__logger.debug("Upload job %s is ready", job_id)
                    ev = cls.__sync_uploads.pop(job_id, None)
                    if ev is None:
                        continue
                    ev.set()
                    idle = False
                if idle:
                    gevent.sleep(cls.IDLE_SLEEP)
            except Exception:
                cls.__logger.exception("Unexpected error in sync uploads checker")
                gevent.sleep(cls.IDLE_SLEEP)
        cls.__logger.info("Sync uploads checker finished")

    @classmethod
    def __broker_loop(cls):
        cls.__running = True
        while cls.__running:
            cls.__logger.info("Starting MDS uploader subprocess")
            mon_r, mon_w = os.pipe()
            for fd in (mon_r, mon_w):
                gevent.os.make_nonblocking(fd)
            sp_ctx = common_os.Subprocess(
                "[MDS Uploader]", logger=cls.__logger, using_gevent=True, on_fork=cls.__on_fork, watchdog=3
            )
            try:
                with sp_ctx:
                    # remove existing logger handlers
                    root = logging.getLogger()
                    for handler in root.handlers[:]:
                        root.removeHandler(handler)
                    for log_filter in root.filters[:]:
                        root.removeFilter(log_filter)
                    # setup separate logging
                    log_path = os.path.join(cls.__config.agentr.log.root, "mds_uploader.log")
                    cls.__logger = common_log.setup_log(log_path, "DEBUG")

                    common_os.Subprocess.recursive = False
                    os.close(mon_w)
                    gevent.spawn(cls.__parent_monitor, mon_r)
                    async_workers_pool = common_os.SubprocessPool(
                        cls.ASYNC_WORKERS, using_gevent=True, logger=cls.__logger,
                        silent_exceptions=(common_mds.S3.TooManyRequests, common_mds.MDS.InsufficientBucketSpace)
                    )
                    sync_workers_pool = common_os.SubprocessPool(
                        cls.SYNC_WORKERS, using_gevent=True, logger=cls.__logger,
                        silent_exceptions=(common_mds.S3.TooManyRequests, common_mds.MDS.InsufficientBucketSpace)
                    )
                    pipe_rpc_server = cls.RPCServer(cls.__pipe_rpc, async_workers_pool, sync_workers_pool)
                    pipe_rpc_server.ping()
                    sp_ctx.stop_watchdog()
                    pipe_rpc_server()
                    cls.__logger.info("MDS uploader subprocess finished")
            except Exception as ex:
                cls.__logger.error("MDS uploader subprocess terminated with error: %r", ex)
            for fd in (mon_r, mon_w):
                os.close(fd)
            cls.__pipe_rpc = common_os.PipeRPC(using_gevent=True)

    @classmethod
    def __clear_mds_upload_record(cls, resource_id):
        cls.__db.query("""DELETE FROM "upload_to_mds" WHERE "resource" = ?""", (resource_id,))

    @classmethod
    def __start_upload(cls, item):
        try:
            res = cls.rest.resource[item.resource_id].read()
        except common_rest.Client.HTTPError as ex:
            if ex.status != requests.codes.NOT_FOUND:
                return None
            res = None
        if res is None or res["state"] in [ctr.State.DELETED, ctr.State.BROKEN]:
            cls.__clear_mds_upload_record(item.resource_id)
            return ""
        if res.get("mds"):
            cls.__clear_mds_upload_record(item.resource_id)
            return ""
        return cls.__pipe_rpc("upload", item)

    @classmethod
    def __commit(cls, item, mds_key=None, failed=False):
        if not failed:
            mds = dict(
                key=mds_key or item.mds_key, namespace=item.namespace
            )
            cls.rest.resource[item.resource_id].source(mds=mds)
            cls.__buckets.actualize(
                dict(id=item.resource_id, mds=mds, multifile=os.path.isdir(item.path)),
                overwrite_meta=False, db=cls.__db, touch=False
            )
        cls.__clear_mds_upload_record(item.resource_id)

    @classmethod
    def __check_locks_update_time(cls):
        now = time.time()
        update_lag = now - cls.__locks_update_time
        if update_lag > cls.__config.common.resources.backup_interval * 3:
            cls.__pipe_rpc("cancel_jobs")
            raise cls.LocksUpdateStalled(
                "Resource locks are not updated during last %s seconds, all active jobs are cancelled", update_lag
            )

    @classmethod
    def __loop(cls):
        cls.__logger.info("MDS uploader started")
        s3 = common_mds.S3()
        active_jobs = {}
        retries = collections.defaultdict(lambda: [0, time.time()])
        while cls.__running:
            # noinspection PyBroadException
            try:
                cls.__check_locks_update_time()
                resources = cls.__db.query(
                    """
                    SELECT "resource", "path", "namespace", "mds_key", "skynet_id", "size", "abc_id", "pack_tar"
                    FROM "upload_to_mds"
                    WHERE "resource" NOT IN (??)
                    """,
                    ([item.resource_id for item in active_jobs.values()],),
                    log=None,
                )
                ready_jobs = cls.__pipe_rpc("ready_jobs")
                if not resources and not ready_jobs:
                    gevent.sleep(cls.IDLE_SLEEP)
                    continue
                resources_it = iter(resources)
                do_next = True
                item = None
                while ready_jobs or resources:
                    cls.__check_locks_update_time()
                    if do_next and resources:
                        try:
                            item = UploadItemInfo(*next(resources_it))
                        except StopIteration:
                            item = None
                            if not ready_jobs:
                                break
                        if item is not None:
                            prev_checkpoint = sum(retries[item.namespace])
                            if time.time() < prev_checkpoint:
                                gevent.sleep()
                                continue
                    if item is not None:
                        if item.mds_key:
                            cls.__commit(item)
                        else:
                            job_id = cls.__start_upload(item)
                            cls.__logger.debug(
                                "Asynchronously uploading resource #%s in subprocess job %s",
                                item.resource_id, job_id
                            )
                            if job_id:
                                active_jobs[job_id] = item
                                do_next = True
                            else:
                                do_next = job_id is not None
                    ready_jobs = cls.__pipe_rpc("ready_jobs")
                    if not ready_jobs:
                        continue
                    ready_job_id = ready_jobs[0]
                    ready_item = active_jobs.pop(ready_job_id)
                    retry = retries[ready_item.namespace]
                    # noinspection PyBroadException
                    try:
                        try:
                            result = cls.__pipe_rpc("result", ready_job_id)
                            cls.__logger.debug("Got result for subprocess job %s", ready_job_id)
                            if result is None:
                                raise cls.SubprocessKilled("subprocess was killed")
                            mds_key, mds_skynet_id = result
                        except IOError as ex:
                            if ex.errno == errno.ENOENT:
                                raise common_mds.MDS.FatalError(str(ex))
                            raise
                    except common_mds.MDS.FatalError as ex:
                        cls.__logger.error(
                            "Failed to upload resource #%s (%s) to MDS: %s",
                            ready_item.resource_id, ready_item.path, ex,
                            exc_info=(type(ex), None, None)
                        )
                        uploaded_keys = list(s3.uploaded_keys(ready_item.namespace, ready_item.resource_id))
                        if uploaded_keys:
                            cls.__logger.warning(
                                "Removing %s uploaded file(s) after failed directory upload", len(uploaded_keys)
                            )
                            common_mds.S3().delete(
                                uploaded_keys, False, namespace=ready_item.namespace, logger=cls.__logger
                            )
                        cls.__clear_mds_upload_record(ready_item.resource_id)
                    except (
                        common_mds.S3.TooManyRequests, common_mds.MDS.InsufficientBucketSpace, cls.SubprocessKilled
                    ) as ex:
                        if isinstance(ex, (common_mds.MDS.InsufficientBucketSpace, cls.SubprocessKilled)):
                            cls.__logger.warning(
                                "Failed to upload resource #%s (%s) to MDS, due to: %s, will try later",
                                ready_item.resource_id, ready_item.path, ex
                            )
                        prev_checkpoint = sum(retry)
                        retry[:] = cls.__get_delay_to_next_retry(*retry)
                        if sum(retry) != prev_checkpoint:
                            cls.__logger.warning(
                                "%s, will retry at %s (after %ss)",
                                ex.message, dt.datetime.fromtimestamp(sum(retry)), retry[0]
                            )
                    except Exception as ex:
                        cls.__logger.exception(
                            "Failed to upload resource #%s (%s) to MDS, will try later: %s",
                            ready_item.resource_id, ready_item.path, ex
                        )
                        gevent.sleep(cls.IDLE_SLEEP)
                    else:
                        del retries[ready_item.namespace]
                        failed = False
                        if ready_item.skynet_id != mds_skynet_id:
                            cls.__logger.error(
                                "MDS returned wrong skynet_id for resource #%s: %s instead of %s",
                                mds_key, mds_skynet_id, ready_item.skynet_id,
                                exc_info=(common_mds.MDS.RBTorrentMismatch, None, None)
                            )
                            if mds_skynet_id and s3.skyboned_enabled:
                                s3.skyboned_remove(mds_skynet_id, ready_item.resource_id, logger=cls.__logger)
                                failed = True
                        cls.__db.query(
                            """UPDATE "upload_to_mds" SET "mds_key" = ? WHERE "resource" = ?""",
                            (mds_key, ready_item.resource_id)
                        )
                        cls.__commit(ready_item, mds_key=mds_key, failed=failed)
            except (gevent.GreenletExit, gevent.hub.LoopExit):
                cls.__logger.info("MDS uploader exited")
                raise
            except cls.LocksUpdateStalled as ex:
                cls.__logger.error(str(ex), exc_info=(type(ex), None, None))
                gevent.sleep(cls.STALLED_LOCKS_SLEEP)
            except Exception:
                cls.__logger.exception("Unexpected error in MDS uploader")
                gevent.sleep(cls.IDLE_SLEEP)

    @classmethod
    def __get_delay_to_next_retry(cls, last_retry_delay, last_check_time=None):
        now = time.time()
        if last_check_time is None:
            last_check_time = now
        if last_check_time + last_retry_delay > now:
            return (last_retry_delay or cls.FIRST_RETRY_DELAY), last_check_time
        next_retry_delay = min(
            cls.MAX_RETRY_DELAY,
            last_retry_delay * 55 / 34 if last_retry_delay else cls.FIRST_RETRY_DELAY
        )
        return next_retry_delay, now

    @classmethod
    def check_bucket_size(cls, namespace, size, logger=None):
        bucket_stats = common_mds.S3.bucket_stats(namespace or ctr.DEFAULT_S3_BUCKET, logger=logger or cls.__logger)
        if bucket_stats:
            free_space = bucket_stats["max_size"] - bucket_stats["used_space"]
            if size > free_space:
                raise common_mds.S3.InsufficientBucketSpace(
                    "Bucket {} has insufficient free space {} to upload {}".format(
                        namespace or ctr.DEFAULT_S3_BUCKET,
                        common_format.size2str(free_space),
                        common_format.size2str(size)
                    )
                )

    @classmethod
    def upload(
        cls, resource_id, path, size, sync,
        skynet_id=None, namespace=None, abc_id=None, compression_type=0, mds_strong_mode=False
    ):
        if sync:
            retry_delay = 0
            subprocess_aborted_retries = 0
            while True:
                try:
                    item = UploadItemInfo(resource_id, path, namespace, None, skynet_id, size, abc_id, compression_type)
                    job_id = cls.__pipe_rpc("upload", item, sync=True)
                    cls.__logger.debug("Synchronously uploading resource #%s in subprocess job %s", resource_id, job_id)
                    ev = cls.__sync_uploads[job_id] = gevent.event.Event()
                    ev.wait()
                    try:
                        result = cls.__pipe_rpc("result", job_id, sync=True)
                        cls.__logger.debug("Got result for subprocess job %s", job_id)
                        return result
                    except IOError as ex:
                        if ex.errno == errno.ENOENT:
                            raise common_mds.MDS.FatalError(str(ex))
                        raise
                    except common_os.SubprocessAborted:
                        subprocess_aborted_retries += 1
                        if subprocess_aborted_retries >= cls.SUBPROCESS_ABORTED_RETRIES:
                            raise
                        cls.__logger.warning("Subprocess aborted for job %s, retrying", job_id)
                except common_mds.S3.TooManyRequests as ex:
                    retry_delay, _ = cls.__get_delay_to_next_retry(retry_delay)
                    cls.__logger.error("%s, will retry after %ss", ex.message, retry_delay)
                    gevent.sleep(retry_delay)
        else:
            if mds_strong_mode:
                cls.check_bucket_size(namespace, size)
            cls.__db.query(
                """
                INSERT OR IGNORE INTO "upload_to_mds"
                ("resource", "path", "namespace", "skynet_id", "size", "abc_id", "pack_tar")
                VALUES
                (?, ?, ?, ?, ?, ?, ?)
                """,
                (resource_id, path, namespace, skynet_id, size, abc_id, compression_type)
            )
            return None, None

    @classmethod
    def sync_upload(
        cls, resource_id, path, size,
        skynet_id=None, namespace=None, abc_id=None, logger=None,
        compression_type=0, use_subprocess=False, recreate_s3_client=False
    ):
        if logger is None:
            logger = cls.__logger
        logger = common_log.MessageAdapter(logger, fmt="[upload #{}] %(message)s".format(resource_id))
        if namespace is None:
            namespace = ctr.DEFAULT_S3_BUCKET
        size_str = common_format.size2str(size) if size else size

        cls.check_bucket_size(namespace, size, logger=logger)

        sp_ctx = common_os.Subprocess(
            "[upload resource #{} ({}) to {}]".format(resource_id, size_str, namespace),
            logger=logger, using_gevent=True, watchdog=1,
        ) if use_subprocess else common_context.NullContextmanager()
        with sp_ctx:
            logger.info(
                "Uploading resource #%s of size %s to S3 bucket %s (ABC service #%s)",
                resource_id, size_str, namespace, abc_id
            )
            if common_os.Subprocess.current is not None:
                common_os.Subprocess.current.stop_watchdog()
            if use_subprocess or recreate_s3_client:
                del common_mds.S3.instance
            if compression_type:
                with open(path) as f:
                    mds_name = os.path.basename(path)
                    mds_key, metadata = common_mds.S3().upload_tar(
                        f, mds_name, namespace=namespace, resource_id=resource_id, size=os.path.getsize(path),
                        compression_type=compression_type, logger=logger, executable=os.access(path, os.X_OK)
                    )
            elif os.path.isdir(path):
                mds_key, metadata = common_mds.S3().upload_directory(
                    path, namespace=namespace, resource_id=resource_id, logger=logger, tar_dir=True
                )
            else:
                mds_name = os.path.basename(path)
                mds_key, metadata = common_mds.S3().upload_file(
                    path, mds_name, namespace=namespace, resource_id=resource_id, logger=logger
                )
            s3 = common_mds.S3()
            if skynet_id and s3.skyboned_enabled:
                mds_skynet_id = s3.skyboned_add(metadata, resource_id, namespace=namespace, logger=logger)
            else:
                mds_skynet_id = skynet_id
            if use_subprocess:
                sp_ctx.result = mds_key, mds_skynet_id
        if use_subprocess:
            mds_key, mds_skynet_id = sp_ctx.result
        logger.info(
            "Resource #%s uploaded to S3 bucket %s with key %s",
            resource_id, namespace, mds_key
        )
        return mds_key, mds_skynet_id

    @common_patterns.classproperty
    def queue_size(self):
        return self.__db.query_one_col("""SELECT count() FROM "upload_to_mds" """)
