from __future__ import absolute_import, print_function

import gevent.monkey
gevent.monkey.patch_all()

# to avoid PyCharm warnings
try:
    import os
    import gc
    import io
    import re
    import six
    import sys
    import json
    import time
    import uuid
    import signal
    import random
    import logging
    import datetime as dt
    import functools as ft
    import itertools as it
    import traceback as tb
    import collections

    # noinspection PyUnresolvedReferences
    import psutil
    import gevent
    import msgpack
    import gevent.os
    import gevent.lock
    import gevent.event
    import gevent.local
    import gevent.queue
    # noinspection PyUnresolvedReferences
    import setproctitle

    from sandbox.common import fs as common_fs
    from sandbox.common import os as common_os
    from sandbox.common import abc as common_abc
    from sandbox.common import log as common_log
    from sandbox.common import auth as common_auth
    from sandbox.common import data as common_data
    from sandbox.common import rest as common_rest
    from sandbox.common import format as common_format
    from sandbox.common import context as common_context
    from sandbox.common import patterns as common_patterns
    from sandbox.common import itertools as common_itertools
    from sandbox.common import statistics as common_statistics
    import sandbox.common.types.task as ctt
    import sandbox.common.types.misc as ctm
    import sandbox.common.types.client as ctc
    import sandbox.common.types.statistics as ctss

    from sandbox.common import joint
    import sandbox.common.joint.errors as jerrors
    import sandbox.common.joint.client as jclient
    import sandbox.common.joint.server as jserver

    import sandbox.serviceq.state as qstate
    import sandbox.serviceq.types as qtypes
    import sandbox.serviceq.client as qclient
    import sandbox.serviceq.config as qconfig
    import sandbox.serviceq.errors as qerrors
    import sandbox.serviceq.journal as qjournal
    import sandbox.serviceq.election as qelection

    from sandbox.yasandbox.database import mapping
finally:
    pass

try:
    import logbroker.unified_agent.client.python as unified_agent
except ImportError:
    unified_agent = None

PROFILE_FILENAME = "serviceq.profile"
FIXED_PRIMARY_FILENAME = "fixed_primary"

singleton_property = type("singleton_property", (common_patterns.singleton_property,), dict(_lock=gevent.lock.RLock()))


class GreenletSwitcher(object):
    def __init__(self, interval=1., sleep_time=.01):
        self.__interval = interval
        self.__sleep_time = sleep_time
        self.__last_switch = time.time()

    def switch(self):
        now = time.time()
        if now - self.__last_switch >= self.__interval:
            self.__last_switch = now
            gevent.sleep(self.__sleep_time)


class OwnerToAbcId(dict):
    def __init__(self, config=None):
        super(OwnerToAbcId, self).__init__()
        if config is None:
            config = qconfig.Registry()
        self.__sandbox_api = common_rest.Client(
            base_url=config.client.rest_url,
            auth=common_auth.OAuth(common_fs.read_settings_value_from_file(config.server.auth.oauth.token))
        )

    def __missing__(self, owner):
        abc_id = self[owner] = common_abc.sandbox_group_to_abc_id(owner, rest_client=self.__sandbox_api)
        return abc_id


# noinspection PyProtectedMember
class Server(jserver.RPC):
    """
    Service Q server
    """
    PROC_PREFIX = "[sandbox] Service Q"
    # noinspection PyTypeChecker
    PROC_SUFFIXES = {
        status: status.ljust(max(map(len, qtypes.Status))).capitalize()
        for status in qtypes.Status
    }

    Contender = qelection.Contender

    _stopping = False

    TaskModel = mapping.Task
    GroupModel = mapping.Group
    UserModel = mapping.User
    SemaphoreModel = mapping.Semaphore

    CRITICAL_ERRORS_INTERVAL = 60
    MAX_CRITICAL_ERRORS = 30
    MAX_SIMULTANEOUS_SNAPSHOTS = 14

    STATE_FILENAME_PREFIX = "serviceq"
    SNAPSHOT_FILENAME_SUFFIX = "snapshot"
    JOURNAL_FILENAME_SUFFIX = "journal"
    SNAPSHOT_BACKUPS = 9

    PUSH_COMMIT_TIMEOUT = 1
    SYNC_COMMIT_TIMEOUT = 5

    STATISTICS_MAX_CHUNK_SIZE = 1000  # number of signals per chunk
    METRICS_MAX_CHUNK_SIZE = 1000  # number of billing metrics per chunk

    # noinspection PyMissingConstructor
    def __init__(self, config=None):
        """
        Constructor

        :param config: settings object
        """
        self._config = config or qconfig.Registry()
        self._logger = logging.getLogger("qserver")

        self.__zk_enabled = self._config.serviceq.zookeeper.enabled
        self.__zk_hosts = (
            self._config.serviceq.zookeeper.hosts
            if self.__zk_enabled else
            ""
        )
        self.__zk_path = self._config.serviceq.zookeeper.root
        self.__zk_timeout = self._config.serviceq.zookeeper.timeout
        self._contender = None
        self.__contenders = []
        self.__replicated_operation_ids = {}
        self.__replicated_operation_ids_changed = gevent.event.Event()
        self.__contender_created = gevent.event.Event()
        self.__elect_primary_greenlet = None

        self.__node_fqdn = self._config.this.fqdn
        self.__node_addr = self.__node_fqdn
        self.__primary_addr = None
        self.__state = qstate.PersistentState()
        self.__api_quota_last_update = 0
        self.__serialized_api_quota = None
        self.__local_state = qstate.LocalState()
        self.__journal = None
        self.__new_journal = None
        self.__prev_role = None
        self.__running = False
        self.__snapshot_restored = False
        self.__replicating = False
        self.__status = None
        self.__gc_interval = self._config.serviceq.server.gc.interval
        self.__current_task_id = None
        self.__local = gevent.local.local()
        self.__snapshot_chunk_size = self._config.serviceq.server.replication.snapshot_chunk_size
        self.__oplog_ttl = self._config.serviceq.server.replication.oplog_ttl
        self.__oplog_chunk_size = self._config.serviceq.server.replication.oplog_chunk_size
        self.__cleanup_oplog_enabled = True
        self.__follower = None
        self.__solomon_pusher = None
        self.__statistics = qtypes.Statistics()
        self.__system_statistics = common_os.SystemStatistics()
        self.__spawned_greenlets = []
        self.__main_pid = os.getpid()
        self.__critical_errors = common_patterns.namedlist("CriticalErrors", "count last_time")([0, None])
        self.__simultaneous_snapshots = gevent.lock.Semaphore(self.MAX_SIMULTANEOUS_SNAPSHOTS)
        self.__qp_calculator = None
        self.__owners_rating_updater = None
        self.__prequeue_timeout = self._config.serviceq.server.prequeue_timeout
        self.__api_quota_updater = None
        self.__resource_locks_cleaner = None

        self.__dump_snapshot_interval = self._config.serviceq.server.dump_snapshot_interval
        self.__load_snapshot_greenlet = None

        qtypes.TaskSemaphores.Acquire.semaphore_index = self.__semaphore_index

        self.__quotas_config = self._config.serviceq.server.quotas
        self.__api_quotas_config = self._config.serviceq.server.api_quotas
        qtypes.ComplexApiConsumption.WINDOW_SIZE = self.__api_quotas_config.api.window_size
        qtypes.ComplexApiConsumption.DEFAULT_QUOTA = self.__api_quotas_config.api.default_quota

        qtypes.ComplexApiConsumption.WEB_WINDOW_SIZE = self.__api_quotas_config.web.window_size
        qtypes.ComplexApiConsumption.WEB_DEFAULT_QUOTA = self.__api_quotas_config.web.default_quota

        self.__mongo_uri = self._config.serviceq.server.mongodb.connection_url

        cache = {}
        cleanup_interval = self._config.serviceq.server.semaphores_cleanup_interval
        self._cleanup_auto_semaphores = common_patterns.ttl_cache(cleanup_interval, cache)(
            self._cleanup_auto_semaphores
        )

        self.__fixed_primary = None
        self.__fixed_primary_checker = None

        self.__db_operations_greenlets = []

        # runtime options
        self.__skipped_due_disk_space_limit = None
        self.__commit_quorum_size = None
        self.__logging_level = None

        self.__owners_rating_by_pools_cache = {}

        self._init_joint(self._config.serviceq.server)

    @property
    @common_patterns.singleton
    def __snapshot_path_to_save(self):
        return os.path.join(
            self._config.common.dirs.runtime,
            ".".join((self.STATE_FILENAME_PREFIX, str(qstate.VERSION), self.SNAPSHOT_FILENAME_SUFFIX))
        )

    @property
    @common_patterns.singleton
    def __journal_path_to_save(self):
        return os.path.join(
            self._config.common.dirs.runtime,
            ".".join((self.STATE_FILENAME_PREFIX, str(qstate.VERSION), self.JOURNAL_FILENAME_SUFFIX))
        )

    @property
    def __state_path_to_load(self):
        snapshot_path = os.path.join(
            self._config.common.dirs.runtime,
            ".".join((self.STATE_FILENAME_PREFIX, self.SNAPSHOT_FILENAME_SUFFIX))
        )
        journal_path = os.path.join(
            self._config.common.dirs.runtime,
            ".".join((self.STATE_FILENAME_PREFIX, self.JOURNAL_FILENAME_SUFFIX))
        )
        for version in range(qstate.VERSION, 0, -1):
            path = os.path.join(
                self._config.common.dirs.runtime,
                ".".join((self.STATE_FILENAME_PREFIX, str(version), self.SNAPSHOT_FILENAME_SUFFIX))
            )
            if os.path.exists(path):
                snapshot_path = path
                journal_path = os.path.join(
                    self._config.common.dirs.runtime,
                    ".".join((self.STATE_FILENAME_PREFIX, str(version), self.JOURNAL_FILENAME_SUFFIX))
                )
                break
        return snapshot_path, journal_path

    def _check_fixed_primary(self):
        fixed_primary_path = os.path.join(self._config.common.dirs.runtime, FIXED_PRIMARY_FILENAME)
        if os.path.exists(fixed_primary_path):
            with open(fixed_primary_path) as f:
                self.__primary_addr = self.__fixed_primary = "{}:{}".format(f.read().strip(), self.__server.port)
        else:
            self.__fixed_primary = None

    def _fixed_primary_checker(self):
        logger = self._logger.getChild("fixed_primary_checker")
        logger.info("Started")
        while self.__running:
            # noinspection PyBroadException
            try:
                fixed_primary = self.__fixed_primary
                self._check_fixed_primary()
                if fixed_primary != self.__fixed_primary:
                    logger.warning("Address changed: %s", self.__fixed_primary)
                    self._status = self._status
                    if not self.__is_primary or not self.__fixed_primary:
                        self._stopping = True
            except Exception:
                self.__statistics.error += 1
                logger.exception("Unexpected error")
            gevent.sleep(self._config.serviceq.zookeeper.fixed_primary_check_interval)
        logger.info("Finished")

    def _init_joint(self, config):
        ctx = joint.Context(config)
        ctx.stalled_jobs_timeout = self._config.serviceq.client.timeout
        jserver.RPC.__init__(self, ctx)
        self.__server = jserver.Server(ctx)
        self.__server.register_connection_handler(self.get_connection_handler())

    @property
    def _node_addr(self):
        return self.__node_addr

    @property
    def _server_addr(self):
        host, port = self._primary.split(":")
        return host, int(port)

    def __rpc_client(self, node):
        h, _, p = node.partition(":")
        rpc = jclient.RPCClientGevent(self._config.serviceq.joint.rpc, h, int(p))
        self._logger.info("Connecting to %s:%s...", h, p)
        rpc.connect()
        self._logger.info("Connected to %s:%s...", h, p)
        return rpc

    def __operation_id(self, node, results):
        while True:
            try:
                rpc = self.__rpc_client(node)
                operation_id = rpc.call("secondary_operation_id").wait(timeout=1)
                self._logger.info("operation_id of %s is %s", node, operation_id)
                results.put((operation_id, node.split(":")[0]))
                break
            except jerrors.RPCError as exc:
                self.__statistics.election_error += 1
                self._logger.error("Error while requesting operation_id of %s: %s", node, exc)
            gevent.sleep(.1)

    def __check_primary(self, node):
        try:
            rpc = self.__rpc_client(node)
            status = rpc.call("secondary_status").wait(timeout=1)
            self._logger.info("status of %s is %s", node, status)
            return status == qtypes.Status.PRIMARY
        except jerrors.RPCError as exc:
            self.__statistics.election_error += 1
            self._logger.error("Error while requesting status of %s: %s", node, exc)

    @property
    def __quorum_size(self):
        return len(self.__contenders) // 2

    def __max_operation_id(self):
        results = gevent.queue.Queue()
        quorum_size = self.__quorum_size
        getters = []
        operation_ids = []
        try:
            for contender in self.__contenders:
                if contender == self.__node_addr:
                    continue
                getters.append(gevent.spawn(self.__operation_id, contender, results))
            only_unwanted = self.__node_fqdn in self.__state.unwanted_contenders
            while getters:
                operation_id, node_fqdn = results.get()
                if only_unwanted and node_fqdn not in self.__state.unwanted_contenders:
                    only_unwanted = False
                operation_ids.append(operation_id)
                if len(operation_ids) >= quorum_size:
                    break
            return max(common_itertools.chain(operation_ids, self.__state.operation_id)), only_unwanted
        finally:
            self._logger.info("Got %d response(s) from %d contender(s)", len(operation_ids), len(getters))
            gevent.killall(getters)

    def __committed_operation_id(self, quorum_size):
        max_commited_id = None
        if len(self.__replicated_operation_ids) >= quorum_size:
            max_commited_id = sorted(self.__replicated_operation_ids.itervalues())[-quorum_size]
        return max_commited_id

    def _wait_commit(self, operation_id=None, timeout=None, forced_quorum_size=None, logger=None):
        logger = logger or self._logger
        if forced_quorum_size is None:
            quorum_size = self.__quorum_size
            if self.__commit_quorum_size is not None:
                quorum_size = self.__commit_quorum_size
        else:
            quorum_size = forced_quorum_size
        if quorum_size == 0:
            return True
        if operation_id is None:
            operation_id = self.__state.operation_id
        if timeout is None:
            timeout = getattr(self.__local, "commit_timeout", None)
        if timeout == 0:
            return False
        while operation_id > self.__committed_operation_id(quorum_size):
            if timeout is not None and timeout < 0:
                logger.warning("Operation #%s timed out", operation_id)
                return False
            start_time = time.time()
            if not self.__replicated_operation_ids_changed.wait(timeout):
                logger.warning("Operation #%s timed out", operation_id)
                return False
            if timeout is not None:
                timeout -= time.time() - start_time
        logger.info("Operation #%s commited", operation_id)
        return True

    def __elect_primary(self):
        while True:
            try:
                self.__contenders = self._contender.contenders
                self.__replicated_operation_ids = {}
                self._logger.info("Initiate election with contenders %s", self.__contenders)
                max_operation_id_greenlet = gevent.spawn(self.__max_operation_id)
                max_operation_id_greenlet.join(self.__zk_timeout)
                if not max_operation_id_greenlet.dead:
                    max_operation_id_greenlet.kill()
                max_operation_id, only_unwanted = max_operation_id_greenlet.value or (None, None)
                if max_operation_id is None:
                    return
                self._logger.info(
                    "Maximum operation_id=%s, own operation_id=%s", max_operation_id, self.__state.operation_id
                )
                contend = (
                    self.__state.operation_id > max_operation_id or
                    self.__state.operation_id == max_operation_id and
                    (only_unwanted or self.__node_fqdn not in self.__state.unwanted_contenders)
                )
                if contend:
                    self._logger.info("Trying to get primary role...")
                primary = self._contender.primary(contend=contend)
                if not primary:
                    gevent.sleep(1)
                    continue
                self._logger.info("Potential primary is %s", primary)
                if primary == self.__node_addr and contend or self.__check_primary(primary):
                    return primary
            except self.Contender.Retry:
                pass
            gevent.sleep(1)

    @property
    def _primary(self):
        if self.__fixed_primary:
            return self.__fixed_primary
        if self._contender and self.__primary_addr:
            return self.__primary_addr
        if not self.__elect_primary_greenlet or self.__elect_primary_greenlet.dead:
            self.__elect_primary_greenlet = self.__spawn(self._elect_primary)
        raise qerrors.QElectionInProgress("Election of PRIMARY is in progress, try again later")

    def _elect_primary(self):
        if not self._contender:
            self.__contender_created.wait()
        primary_addr = self.__primary_addr or self.__elect_primary()
        if not primary_addr:
            self.__statistics.error += 1
            if self._contender:
                self._restart_contender()
            raise qerrors.QTimeout("Cannot elect new primary in {}s".format(self.__zk_timeout))
        if self.__primary_addr != primary_addr:
            self._logger.info("Primary at %s", primary_addr)
        self.__primary_addr = primary_addr

    @property
    def __is_primary(self):
        if self.__fixed_primary:
            return self.__fixed_primary == self.__node_addr
        if self.__zk_enabled:
            return self.__primary_addr == self.__node_addr if self.__primary_addr else None
        return True

    @singleton_property
    def __db_connection(self):
        while True:
            self._logger.info("Connecting to Mongo DB at %s", self.__mongo_uri)
            try:
                return mapping.ensure_connection(
                    uri=self.__mongo_uri,
                    max_pool_size=self._config.serviceq.server.mongodb.max_pool_size
                )
            except (mapping.ConnectionFailure, mapping.ConnectionError) as ex:
                self.__statistics.error += 1
                self._logger.error("Error while connecting to DB: %s", ex)
                gevent.sleep(1)

    def __spawn(self, func, *args, **kws):
        greenlet = gevent.spawn(func, *args, **kws)
        self.__spawned_greenlets.append(greenlet)
        return greenlet

    def __db_operations_executor(self, queue_name):
        logger = self._logger.getChild("db_operations").getChild(queue_name)
        _ = self.__db_connection  # noqa
        logger.info("Started")
        try:
            queue = self.__state.db_operations[queue_name]
            while True:
                operation = queue.peek()
                # noinspection PyBroadException
                try:
                    logger.info("Executing operation %r", operation)
                    operation()
                    logger.info("Operation %r executed successfully, removing from queue", operation)
                    self.__state.pop_db_operation(self, queue_name)
                except (
                    mapping.ConnectionError,
                    mapping.AutoReconnect,
                    mapping.OperationFailure,
                    mapping.OperationError,
                ) as ex:
                    self.__statistics.error += 1
                    logger.error("Error executing operation %r: %s", operation, ex)
                    gevent.sleep(1)
                except Exception:
                    self.__state.pop_db_operation(self, queue_name)
                    self.__statistics.critical += 1
                    logger.exception("Unexpected error")
                    gevent.sleep(1)
        finally:
            logger.info("Stopped")

    @property
    def _is_primary(self):
        is_primary = (
            self.__fixed_primary == self.__node_addr
            if self.__fixed_primary else
            not self.__zk_enabled or self._primary and self.__is_primary
        )
        if is_primary != self.__prev_role:
            self.__prev_role = is_primary
            if is_primary:
                if (
                    self._status == qtypes.Status.STARTING and
                    self.__load_snapshot_greenlet and not self.__load_snapshot_greenlet.dead
                ):
                    gevent.wait([self.__load_snapshot_greenlet])
                self._status = qtypes.Status.PRIMARY
                self.__replicating = True
                self.__contenders = [self.__node_addr]
                self.__replicated_operation_ids = {}
                if self.__follower:
                    self.__follower.kill()
                self.__qp_calculator = self.__spawn(self.qp_calculator)
                self.__owners_rating_updater = self.__spawn(self.owners_rating_updater)

                self.__solomon_pusher = self.__spawn(self.solomon_pusher)
                self.__api_quota_updater = self.__spawn(self.api_quotas_updater)
                self.__resource_locks_cleaner = self.__spawn(self.resource_locks_cleaner)
                self.__db_operations_greenlets = [
                    self.__spawn(self.__db_operations_executor, queue_name)
                    for queue_name in self.__state.db_operations.queues
                ]
            else:
                self.kill_active_jobs("Not a primary now")
                for g in self.__db_operations_greenlets:
                    if not g.dead:
                        g.kill()
                if self.__solomon_pusher and not self.__solomon_pusher.dead:
                    self.__solomon_pusher.kill()
                if self.__qp_calculator and not self.__qp_calculator.dead:
                    self.__qp_calculator.kill()
                if self.__owners_rating_updater and not self.__owners_rating_updater.dead:
                    self.__owners_rating_updater.kill()
                if self.__api_quota_updater and not self.__api_quota_updater.dead:
                    self.__api_quota_updater.kill()
                if self.__resource_locks_cleaner and not self.__resource_locks_cleaner.dead:
                    self.__resource_locks_cleaner.kill()
                self.__state.common_oplog.clear()
                self.__state.oplogs.clear()
                self.__local_state.complex_api_consumption = qtypes.ComplexApiConsumption()
        if not is_primary:
            if (not self.__follower or self.__follower.dead) and self.__primary_addr:
                self.__follower = self.__spawn(self.__follow_primary)
        return is_primary

    def __follow_primary(self):
        self._logger.info("Starting follower")
        try:
            client = qclient.Client(self._config)
            while True:
                gevent.sleep()
                if not self.__primary_addr:
                    break
                # noinspection PyBroadException
                try:
                    host, port = self.__primary_addr.split(":")
                    self._logger.info("Primary address is %s", self.__primary_addr)
                    self._status = qtypes.Status.RESTORING
                    client._primary_server_addr = (host, int(port))
                    del client._primary_rpc
                    snapshot = client.snapshot(self.__state.operation_id, self.__state.operations_checksum)
                    snapshot_id, snapshot_size = snapshot.next()
                    # if snapshot_size is None, no need getting fo the snapshot, just start getting oplog
                    if snapshot_size is not None:
                        self._logger.info(
                            "Receiving snapshot #%s (%s)", snapshot_id, common_format.size2str(snapshot_size)
                        )
                        unpacker = common_data.msgpack_unpacker()
                        size = 0
                        state = None
                        snapshot_path_tmp = self.__snapshot_path_to_save + "~"
                        with open(snapshot_path_tmp, "wb") as f:
                            for data in snapshot:
                                f.write(data)
                                unpacker.feed(data)
                                size += len(data)
                                self._logger.info(
                                    "Receiving snapshot #%s: %d / %d bytes [%.2f%%]",
                                    snapshot_id, size, snapshot_size, size * 100. / snapshot_size
                                )
                                for value in unpacker:
                                    if isinstance(value, int):
                                        continue
                                    state = value
                                    break
                                if state is not None:
                                    break

                        if state is None:
                            self._logger.error("Incomplete snapshot")
                            continue
                        self.__rotate_snapshot_backups(self.__snapshot_path_to_save)
                        os.rename(snapshot_path_tmp, self.__snapshot_path_to_save)
                        self.__journal = qjournal.OperationJournal(
                            self.__journal_path_to_save, force_create=True, logger=self._logger
                        )
                        gevent.sleep()
                        assert size == snapshot_size, "Received {} instead of {} bytes".format(size, snapshot_size)
                        self._logger.info("Snapshot received (%s)", common_format.size2str(size))
                        with common_context.disabled_gc():
                            self.__state, unpacker = (
                                qstate.PersistentState.decode(state, logger=self._logger),
                                None
                            )
                    self._status = qtypes.Status.SECONDARY
                    self._logger.info("Receiving oplog")
                    gen = client.oplog(snapshot_id, self.__node_addr)
                    commited_operation_id = None
                    while True:
                        chunk = gen.send(commited_operation_id)
                        if chunk is None:
                            break
                        last_operation_id, operations = chunk
                        self._logger.info(
                            "Last operation #%s, received %s operation(s), delay for %s operation(s)",
                            last_operation_id, len(operations), last_operation_id - self.__state.operation_id
                        )
                        for operation in it.imap(qjournal.Operation.decode, operations):
                            method_name = self.__state.operations[operation.method].__name__
                            self._logger.info("Applying operation #%s [%s]", operation.operation_id, method_name)
                            self.add_to_journal(operation)
                            self.__state.apply(self, *operation)
                            commited_operation_id = operation.operation_id
                            gevent.sleep()
                except qerrors.QSnapshotNotReady:
                    self._logger.warning("Snapshot is not ready yet")
                    gevent.sleep(3)
                except qerrors.QOutdated:
                    self._logger.warning("Snapshot is outdated")
                    self.__state.operation_id = None
                except (qerrors.QNotPrimary, qerrors.QRedirect) as ex:
                    self._logger.exception("Primary server changed: %s", ex)
                    try:
                        type(self)._primary.__get__(self)
                    except qerrors.QElectionInProgress:
                        self.__elect_primary_greenlet.join()
                    gevent.sleep(.1)
                except (qerrors.QRetry, qerrors.QTimeout, jerrors.CallError) as ex:
                    self.__statistics.error += 1
                    self._logger.warning("Primary server is not responding: %s", ex)
                    self._restart_contender()
                    break
                except Exception:
                    self.__statistics.error += 1
                    self._logger.exception("Unexpected error in follower")
                    self._restart_contender()
                    break
        finally:
            self._logger.info("Follower stopped")

    def __semaphore_index(self, name, capacity, public):
        index = self.__state.semaphore_index.get(name)
        if index is not None:
            return index
        if name in self.__state.semaphores:
            return name
        self._cleanup_auto_semaphores()
        # create semaphore automatically
        sem_id = self.__state.next_id(self, "semaphore")
        owner = self.__local.current_task_owner
        self.__state.add_semaphore(self, sem_id, name, owner, capacity, True, (), public)
        self.__state.push_db_operation_(self, qtypes.DBOperations.CreateSemaphore(
            sem_id, name, owner,
            auto=True, capacity=capacity, public=public
        ), logger=self.__local.current_logger)
        self._wait_commit(logger=self.__local.current_logger)
        return sem_id

    def __add_to_queue(self, task_id, priority, hosts, score, task_info, logger):
        if task_id in self.__state.executing_tids:
            self.__statistics.already_executing += 1
            raise qerrors.QAlreadyExecuting("Task #{} already executing".format(task_id))
        semaphores = task_info.semaphores
        sem_ids = None
        if not isinstance(semaphores, ctt.Semaphores):
            sem_ids, semaphores = semaphores, None
        if semaphores:
            self.__local.current_task_id = task_id
            self.__local.current_task_owner = self.__state.owners[task_info.owner]
            self.__local.current_logger = logger
            sem_names = set()
            sem_dups = []
            acquires, release = semaphores
            acquires = [(a, sem_names.add(a[0]))[0] for a in acquires if a[0] not in sem_names or sem_dups.append(a)]
            if sem_dups:
                logger.warning("Filtered duplicated semaphores for task #%s: %s", task_id, sem_dups)
            # FIXME: this may create new auto semaphore(s), need to avoid RC with _cleanup_auto_semaphores
            qtypes.TaskSemaphores(acquires, release)
            semaphores = qtypes.TaskSemaphores(acquires, release)
        prev_semaphores = self.__state.task_semaphores.get(task_id)
        already_in_queue = task_id in self.__state.task_queue_index
        if semaphores or (prev_semaphores and not already_in_queue):
            # temporary semaphores may be already deleted, need to check
            if (
                prev_semaphores and
                all(a.name in self.__state.semaphores for a in prev_semaphores.acquires) and
                prev_semaphores != semaphores
            ):
                logger.warning("Task #%s already acquired other semaphores: %r", task_id, prev_semaphores)
                self.__release_semaphores(task_id, ctt.Status.ENQUEUED, logger, wait_commit=False)
            logger.info("Task #%s wants semaphores %s", task_id, semaphores)
            self.__state.set_task_semaphores(self, task_id, semaphores)
            if semaphores:
                sem_ids = [
                    acquire.name
                    for acquire in semaphores.acquires
                    if task_id not in self.__state.tasks_acquired
                ]
        self.__state.add_task(
            self, task_id, priority, hosts, qtypes.TaskInfo(
                requirements=task_info.requirements, semaphores=sem_ids, type=task_info.type,
                owner=task_info.owner, enqueue_time=task_info.enqueue_time,
                duration=task_info.duration, client_tags=task_info.client_tags
            ),
            score,
        )

    def _cleanup_auto_semaphores(self):
        self._logger.info("Searching unused auto semaphores")
        semaphores_to_remove = []
        for sem_id, sem in self.__state.auto_semaphores.items():
            if sem.value or self.__state.wants_semaphore.get(sem_id):
                continue
            self._logger.info("Removing unused auto semaphore #%s (%s)", sem_id, sem)
            self.__state.remove_semaphore(self, sem_id)
            semaphores_to_remove.append(sem_id)
        if not semaphores_to_remove:
            self._logger.info("No auto semaphores to remove")
            return
        self._logger.info("Removing unused auto semaphores %s from DB", semaphores_to_remove)
        self.__state.push_db_operation_(self, qtypes.DBOperations.DeleteSemaphore(semaphores_to_remove))

    def __collect_garbage(self, switcher=None):
        # cleanup queue by clusters and hosts
        for cluster_hash, by_cluster in self.__state.cluster_queue.items():
            for cluster, by_owner in by_cluster.items():
                for owner, by_prio in by_owner.items():
                    for prio, queue in by_prio.items():
                        if switcher:
                            switcher.switch()
                        queue.cleanup()
                        if queue:
                            continue
                        # remove empty queue
                        for host in cluster:
                            host_queues = self.__state.host_queue[host]
                            owner_queues = host_queues[owner]
                            prio_queues = owner_queues[prio]
                            prio_queues[:] = [_ for _ in prio_queues if _[1] is not queue]
                            if not prio_queues:
                                del owner_queues[prio]
                            if not owner_queues:
                                del host_queues[owner]
                            if not host_queues:
                                del self.__state.host_queue[host]
                        del by_prio[prio]
                    if not by_prio:
                        del by_owner[owner]
                if not by_owner:
                    del by_cluster[cluster]
            if not by_cluster:
                del self.__state.cluster_queue[cluster_hash]

        # cleanup queue by tasks
        for task_queue in self.__state.task_queue.values():
            task_queue[:] = [entry for entry in task_queue if entry]
            if switcher:
                switcher.switch()

        # remove tasks are not in queue from semaphore waiters
        for task_id in self.__state.semaphore_blockers.tasks - six.viewkeys(self.__state.task_queue_index):
            self.__state.semaphore_blockers.remove(task_id)

    def _collect_garbage(self):
        logger = self._logger.getChild("garbage_collector")
        logger.info("%d opened connections active jobs statistics: %r", len(self.counters), self.counters)
        # TODO: https://st.yandex-team.ru/SANDBOX-7583
        proc = psutil.Process(os.getpid())
        mem = getattr(proc, "memory_info", getattr(proc, "get_memory_info", None))()
        logger.info(
            "Current memory usage: %s RSS, %s VSZ",
            common_format.size2str(mem.rss), common_format.size2str(mem.vms)
        )
        logger.info("Starting garbage collection")
        # noinspection PyBroadException
        try:
            switcher = GreenletSwitcher(interval=.3, sleep_time=.1)
            self.__collect_garbage(switcher=switcher)
            logger.info("Garbage collection completed. Statistics: %s", self.__state.stats)
        except Exception:
            self.__statistics.error += 1
            logger.exception("Error while collecting of garbage")

    @property
    def _status(self):
        return self.__status

    @_status.setter
    def _status(self, status):
        prev_status, self.__status = self.__status, status
        title = self.PROC_SUFFIXES.get(status, "")
        if self.__fixed_primary:
            title = "FIXED " + title
        if not prev_status or prev_status != status:
            self._logger.info(
                "Service role switched%s to %s",
                " from {}".format(prev_status) if prev_status else "",
                status
            )
        setproctitle.setproctitle("{} {}".format(self.PROC_PREFIX, title))

    def start(self):
        self._logger.info("Starting")
        # noinspection PyAttributeOutsideInit
        self._status = qtypes.Status.STARTING
        self.__server.start()
        self.__node_addr = "{}:{}".format(self.__node_fqdn, self.__server.port)
        self._check_fixed_primary()
        super(Server, self).start()
        operation_id_ready = gevent.event.Event()
        self.__load_snapshot_greenlet = self.__spawn(self.load_snapshot, operation_id_ready=operation_id_ready)
        operation_id_ready.wait()
        self._logger.info("Started")
        self.__running = True
        self.__replicating = True
        self.__fixed_primary_checker = self.__spawn(self._fixed_primary_checker)
        if not self.__fixed_primary and self.__zk_enabled:
            self._contender = self.Contender(
                self.__zk_hosts, self.__zk_path, self.__node_addr,
                timeout=self.__zk_timeout, on_start=self._on_contender_start,
                logger=self._logger, lock_cls=gevent.lock.RLock
            )
            self._contender.start()
            self.__spawn(self.__contender_watchdog)
            self.__contender_created.set()
        if self.__gc_interval:
            self.__spawn(self.gc)
        if self.__dump_snapshot_interval:
            self.__spawn(self.snapshot_dumper)
        self.__spawn(self.system_statistics_collector)
        self.__spawn(self.statistics_pusher)
        return self

    def __contender_watchdog(self):
        self._logger.info("Contender watchdog started")
        while self.__running:
            try:
                self._contender.wait()
                self.__primary_addr = None
                self._logger.warning("Contender process terminated")
                if not self.__running:
                    break
                self._contender.restart()
                self.__prev_role = None
                self.__replicating = False
                self._status = qtypes.Status.TRANSIENT
                if self.__follower:
                    self.__follower.kill()
            except Exception as ex:
                self._logger.error("Error in contender watchdog: %s", ex)

    def on_stalled_jobs(self, stalled):
        if not self.__running:
            return
        for job_id, greenlet, timestamp in stalled:
            job_stack = "".join(tb.format_stack(greenlet.gr_frame))
            self.__statistics.error += 1
            self._logger.error("Job #%s is stalled:\n%s", job_id, job_stack)
        # self.stop()

    def _restart_contender(self):
        if not self._contender:
            return
        self._contender.restart()
        self.__prev_role = None
        self.__primary_addr = None
        self._status = qtypes.Status.TRANSIENT
        if self.__follower:
            self.__follower.kill()

    def _on_contender_start(self, contender):
        self._contender = None
        contender.on_fork(self.__server)
        setproctitle.setproctitle("{} Contender".format(self.PROC_PREFIX))
        log = self._config.serviceq.zk_log
        root = logging.getLogger()
        map(root.removeHandler, root.handlers[:])
        map(root.removeFilter, root.filters[:])
        logger = common_log.setup_log(
            logging.FileHandler(os.path.join(log.root, log.name)), log.level, without_signals=True
        ).getChild(contender.__module__)

        common_statistics.Signaler(
            common_statistics.ServerSignalHandler(config=qconfig.Registry()),
            logger=logger,
            component=ctm.Component.SERVICEQ,
            config=qconfig.Registry()
        )
        return logger

    def loop(self):
        while self.__running:
            gevent.sleep(random.uniform(0.9, 1.1))
            if self.__main_pid != os.getpid():
                # cannot exit from main greenlet, so that need to sleeping infinitely
                gevent.sleep(float("inf"))
            if self._stopping:
                self.stop()
                continue
            try:
                type(self)._is_primary.__get__(self)
            except (self.Contender.Retry, qerrors.QElectionInProgress):
                pass

    def solomon_pusher(self):
        settings = self._config.serviceq.server.statistics.semaphores
        solomon_token = common_fs.read_settings_value_from_file(settings.solomon_token)
        common_labels = {
            "project": settings.project,
            "cluster": settings.cluster,
            "service": settings.service
        }
        if not all(common_labels.itervalues()) or not solomon_token:
            self.__solomon_pusher = None
            return self._logger.warning("Solomon pusher is not configured")
        self._logger.info("Starting Solomon pusher")
        solomon = common_rest.Client(settings.solomon_url, auth=solomon_token) >> common_rest.Client.PLAINTEXT
        try:
            last_running = int(time.time())
            while self.__running:
                gevent.sleep(max(0, last_running + settings.interval - time.time()))
                if not self.__is_primary:
                    break
                last_running = int(time.time())
                # noinspection PyBroadException
                try:
                    self._logger.info("Collecting semaphore metrics")
                    sensors = []
                    for sem_id, sem in self.__state.semaphores.iteritems():
                        if sem.auto:
                            continue
                        blocked_value = self.__state.semaphore_blockers.size(sem_id)
                        sensors.extend([
                            {
                                "labels": {"sensor": sem.name, "type": "semaphore_capacity"},
                                "ts": last_running,
                                "value": sem.capacity
                            },
                            {
                                "labels": {"sensor": sem.name, "type": "semaphore_value"},
                                "ts": last_running,
                                "value": sem.value
                            },
                            {
                                "labels": {"sensor": sem.name, "type": "semaphore_blocked"},
                                "ts": last_running,
                                "value": blocked_value
                            }
                        ])
                    self._logger.info("Sending semaphore metrics to Solomon")
                    solomon[
                        "push?project={project}&cluster={cluster}&service={service}".format(**common_labels)
                    ](sensors=sensors)
                except Exception:
                    self._logger.exception("Error while pushing semaphore metrics to Solomon")
        finally:
            self._logger.info("Solomon pusher stopped")

    def statistics_pusher(self):
        settings = self._config.serviceq.server.statistics
        if not settings.enabled:
            return self._logger.warning("Statistics pusher is not enabled")

        self._logger.info("Starting statistics pusher")

        owner_re = re.compile(r"^[\w\-]+$")

        owner_to_abc_id = OwnerToAbcId(self._config)

        try:
            last_billing_send = last_api_consumption_push = last_qp_push = last_running = int(time.time())
            while self.__running:
                gevent.sleep(max(0, last_running + settings.interval - time.time()))
                last_running = int(time.time())
                # noinspection PyBroadException
                try:
                    self._logger.info("Collecting statistics")
                    data = self.__statistics.dump()
                    signals_by_type = collections.defaultdict(list)
                    billing_metrics = []
                    switcher = GreenletSwitcher()
                    if self._status == qtypes.Status.PRIMARY:
                        # collect counters
                        for timestamp, counters in data.counters_history:
                            for name, count in counters.iteritems():
                                signals_by_type[ctss.SignalType.SERVICEQ_COUNTER].append(dict(
                                    date=timestamp,
                                    timestamp=timestamp,
                                    name=name,
                                    count=count,
                                    server=self._config.this.fqdn
                                ))
                                switcher.switch()

                        # collect queue operations
                        timestamp = int(time.time())
                        for tag in data.wants_task.viewkeys() | data.got_task.viewkeys():
                            signals_by_type[ctss.SignalType.SERVICEQ_TASK].append(dict(
                                date=timestamp,
                                timestamp=timestamp,
                                tag=tag,
                                wants=data.wants_task.get(tag, 0),
                                got=data.got_task.get(tag, 0),
                                server=self._config.this.fqdn
                            ))
                            switcher.switch()

                        # collect calls
                        for call_info in data.calls:
                            signals_by_type[ctss.SignalType.SERVICEQ_CALL].append(dict(
                                date=int(call_info.start),
                                timestamp=int(call_info.start),
                                method=call_info.name,
                                duration=int(call_info.duration * 1000),
                                server=self._config.this.fqdn
                            ))
                            switcher.switch()

                        # SANDBOX-5705: tasks whose execution was finished
                        for execution in data.done_tasks:
                            signals_by_type[ctss.SignalType.QUOTA_CONSUMPTION_DETAILS].append(dict(
                                date=execution.finished,
                                timestamp=execution.finished,
                                task_id=execution.id,
                                real_consumption=execution.consumption,  # already downscaled on execution finish
                                ram=execution.ram,
                                cpu=execution.cpu,
                                hdd=execution.hdd,
                                ssd=execution.ssd,
                                pool=execution.pool,
                            ))

                        # collect computing resources consumptions
                        if last_running - last_qp_push >= self.__quotas_config.consumption.calculation_interval:
                            last_qp_push = last_running
                            for owner, pool_consumption in self.__state.consumptions.iteritems():
                                for pool, consumption in pool_consumption.iteritems():
                                    # FIXME: don't push statistics for wrong owners until its will be removed
                                    owner_str = self.__state.owners[owner]
                                    if not owner_re.match(owner_str):
                                        continue
                                    pool_str = None if pool is None else self.__state.quota_pools.pools[pool]
                                    real_consumption, future_consumption = map(qstate.quota_downscale, consumption.qp)
                                    signals_by_type[ctss.SignalType.QUOTA_CONSUMPTION].append(dict(
                                        date=last_running,
                                        timestamp=last_running,
                                        owner=owner_str,
                                        pool=pool_str,
                                        quota=qstate.quota_downscale(self.__state.quotas[owner].get(pool, (0,))[0]),
                                        real_consumption=real_consumption,
                                        future_consumption=future_consumption,
                                        executing_jobs=len(consumption.executing_jobs),
                                        ram=consumption.ram,
                                        cpu=consumption.cpu,
                                        hdd=consumption.hdd,
                                        ssd=consumption.ssd,
                                    ))
                                switcher.switch()

                        # collect api consumptions
                        if last_running - last_api_consumption_push >= self.__api_quotas_config.statistics.interval:
                            last_api_consumption_push = last_running
                            api_consumptions_types = (
                                (
                                    self.__local_state.complex_api_consumption.api_consumption,
                                    ctt.RequestSource.API
                                ),
                                (
                                    self.__local_state.complex_api_consumption.web_api_consumption,
                                    ctt.RequestSource.WEB
                                )
                            )
                            for api_consumption, source in api_consumptions_types:
                                for login, consumption_info in api_consumption.consumption.iteritems():
                                    consumption, quota = consumption_info
                                    signals_by_type[ctss.SignalType.API_QUOTA_CONSUMPTION].append(dict(
                                        date=last_running,
                                        timestamp=last_running,
                                        login=login,
                                        consumption=consumption,
                                        quota=quota,
                                        source=source
                                    ))

                        # collect replication delays
                        for addr, operation_id in self.__replicated_operation_ids.iteritems():
                            signals_by_type[ctss.SignalType.SERVICEQ_REPLICATION_DELAY].append(dict(
                                date=timestamp,
                                timestamp=timestamp,
                                delay=self.__state.operation_id - operation_id,
                                follower=addr.partition(":")[0],
                                server=self._config.this.fqdn
                            ))

                        # collect billing metrics
                        if last_running - last_billing_send >= self.__quotas_config.billing.send_interval:
                            last_billing_send = last_running
                            billing_metrics = self.__state.collect_billing_metrics(
                                self, owner_to_abc_id, switcher, self._config
                            )
                            self._logger.info("%s billing metrics are collected", len(billing_metrics))

                    # collect system statistics
                    timestamp = int(time.time())
                    for point in self.__system_statistics:
                        signals_by_type[ctss.SignalType.SERVICEQ_SYSTEM_RESOURCES].append(dict(
                            date=timestamp,
                            timestamp=timestamp,
                            user_cpu=point.user_cpu,
                            system_cpu=point.system_cpu,
                            rss=point.rss >> 20,
                            vms=point.vms >> 20,
                            role=self._status,
                            server=self._config.this.fqdn
                        ))
                        switcher.switch()

                    if unified_agent is not None:
                        self._logger.info("Sending statistics")
                        uri = self._config.common.unified_agent["serviceq_statistics"]["uri"]
                        ua_client = unified_agent.Client(uri, log_level=logging.DEBUG)
                        ua_session = ua_client.create_session()
                        for signal_type, signals in signals_by_type.iteritems():
                            for chunk in common_itertools.chunker(signals, self.STATISTICS_MAX_CHUNK_SIZE):
                                self._logger.info(
                                    "Sending %s signal(s) of type %s", len(chunk), signal_type
                                )
                                ua_session.send(json.dumps({signal_type: chunk}) + "\n", time.time())
                        ua_session.close()

                        if billing_metrics and self._config.common.installation == ctm.Installation.PRODUCTION:
                            self._logger.info("Sending billing metrics")
                            uri = self._config.common.unified_agent["billing_metrics"]["uri"]
                            ua_client = unified_agent.Client(uri, log_level=logging.DEBUG)
                            ua_session = ua_client.create_session()
                            for metric in common_itertools.chunker(billing_metrics, self.METRICS_MAX_CHUNK_SIZE):
                                ua_session.send(json.dumps(metric) + "\n", time.time())
                            ua_session.close()

                except Exception:
                    self._logger.exception("Error while sending statistics")

        finally:
            self._logger.info("Statistics pusher stopped")

    def system_statistics_collector(self):
        settings = self._config.serviceq.server.statistics
        if not settings.enabled:
            return self._logger.warning("System statistics collector is not enabled")
        interval = self._config.serviceq.server.statistics.system_collector_interval
        self._logger.info("System statistics collector started")
        try:
            last_running = time.time()
            while self.__running:
                gevent.sleep(max(0, last_running + interval - time.time()))
                last_running = int(time.time())
                self.__system_statistics.checkpoint()
        finally:
            self._logger.info("System statistics collector stopped")

    def __sort_owners_rating(self, logger):
        with common_context.Timer() as timer:
            self.__state.sort_owners_rating(self)
        logger.info("Rating for %s owner(s) sorted in %s", len(self.__state.owners_rating), timer)

    def owners_rating_updater(self):
        self._logger.info("Starting owners rating updater")
        interval = self.__quotas_config.owners_rating_update_interval
        try:
            last_running = time.time()
            while self.__running:
                gevent.sleep(max(0, last_running + interval - time.time()))
                if not self.__is_primary:
                    break
                last_running = int(time.time())
                self.__sort_owners_rating(self._logger)
        finally:
            self._logger.info("Owners rating updater stopped")

    def __calculate_consumption(self, last_running, logger=None):
        logger = logger or self._logger
        logger.info("Calculating quota consumptions")
        consumptions = dict(self.__state.calculate_qp(self, last_running))
        logger.info("%s quota consumption(s) was calculated", len(consumptions))
        self.__sort_owners_rating(logger)

    def qp_calculator(self):
        self._logger.info("Starting quota consumption calculator")
        interval = self.__quotas_config.consumption.calculation_interval
        try:
            last_running = time.time()
            while self.__running:
                gevent.sleep(max(0, last_running + interval - time.time()))
                if not self.__is_primary:
                    break
                last_running = int(time.time())
                self.__calculate_consumption(last_running)
        finally:
            self._logger.info("Quota consumption calculator stopped")

    def gc(self):
        now = time.time()
        self._logger.info("Garbage collector started with period of %ss", self.__gc_interval)
        while self.__running:
            gevent.sleep(1)
            if time.time() - now < self.__gc_interval:
                continue
            now = time.time()
            # noinspection PyBroadException
            try:
                self._collect_garbage()
            except Exception:
                self.__statistics.error += 1
                self._logger.exception("Error while collecting garbage")

    def stop(self, graceful=False):
        if not self.__running:
            return
        self._logger.info("Stopping")
        self.__running = False
        self.__snapshot_restored = False
        status = self._status
        # noinspection PyAttributeOutsideInit
        self._status = qtypes.Status.STOPPING
        # wait for replication complete
        if status == qtypes.Status.PRIMARY:
            self._wait_commit(
                timeout=self._config.serviceq.server.replication.stopping_timeout,
                forced_quorum_size=len(self.__contenders) - 1
            )
        self.__replicating = False
        if not self.__fixed_primary and self.__zk_enabled:
            if self._contender:
                self._contender.stop()
        for g in self.__spawned_greenlets:
            if g and not g.dead:
                g.kill()
        for journal in (self.__journal, self.__new_journal):
            if journal:
                journal.flush()
        self.__server.stop()
        self.__server.join()
        super(Server, self).stop(graceful=graceful)
        super(Server, self).join()
        self._logger.info("Stopped")

    @staticmethod
    def __rpc_method(method_type=jserver.RPC.simple, allow_secondary=False, account=True):
        def decorator(method):
            original_method = method
            if account:
                def method(self, *args, **kws):
                    start = time.time()
                    try:
                        return original_method(self, *args, **kws)
                    finally:
                        duration = time.time() - start
                        self.__statistics.call(original_method.__name__, start, duration)

            @ft.wraps(original_method)
            def wrapper(self, *args, **kws):
                if not self.__running:
                    self.__statistics.error += 1
                    raise qerrors.QRetry("Not running")
                if self._is_primary:
                    if not self.__snapshot_restored:
                        raise qerrors.QRetry("Local snapshot is not restored yet")
                    return method(self, *args, **kws)
                else:
                    primary = self._primary
                    if primary:
                        raise qerrors.QRedirect(primary)
                    else:
                        self.__statistics.error += 1
                        raise qerrors.QRetry("Primary location is unknown")

            if allow_secondary:
                @ft.wraps(original_method)
                def secondary_wrapper(self, *args, **kws):
                    if not self.__running:
                        self.__statistics.error += 1
                        raise qerrors.QRetry("Not running")
                    return method(self, *args, **kws)
                secondary_wrapper.__name__ = "secondary_{}".format(original_method.__name__)
                method_type(secondary_wrapper, original_method)
            return method_type(wrapper, original_method)
        return decorator
    # noinspection PyUnresolvedReferences
    __rpc_method = __rpc_method.__func__

    def __push(self, task_id, priority, hosts, task_info=None, score=None, logger=None):
        if logger is None:
            logger = self._logger
        is_task_info_empty = task_info is None
        task_info = qtypes.TaskInfo.decode(task_info)
        requirements = task_info.requirements
        semaphores = task_info.semaphores and ctt.Semaphores(*task_info.semaphores)
        task_type = task_info.type
        owner = task_info.owner
        enqueue_time = task_info.enqueue_time
        duration = task_info.duration
        client_tags = task_info.client_tags
        queue_item = self.__state.task_queue_index.get(task_id)
        if priority is None and hosts is None:
            if queue_item is not None and queue_item.task_ref:
                logger.info("Removing task #%s from the queue", task_id)
                semaphores = self.__state.task_semaphores.get(task_id)
                if semaphores:
                    sem_ids = []
                    for acquire in semaphores.acquires:
                        sem_ids.append(acquire.name)
                    self.__state.del_wanted_semaphores(self, task_id, sem_ids)
                    self.__state.set_task_semaphores(self, task_id, None)
                self.__state.del_task(self, task_id)
        else:
            if queue_item is None:
                logger.info("Adding task #%s to the queue", task_id)
                if is_task_info_empty:
                    logger.warning("Cannot enqueue task #%s without info", task_id)
                    return
            else:
                logger.info("Updating task #%s in the queue", task_id)
            # convert strings to integer indexes
            if hosts is not None and score is None:
                hosts = [[h[0], self.__state.add_host_(self, h[1])] for h in hosts]
            if owner is not None:
                owner = self.__state.add_task_owner_(self, owner)
                owner_quota = self.__state.quotas.get(owner)
                if owner_quota is None:
                    for pool, pool_index in it.chain(self.__state.quota_pools.pools.index.items(), [(None, None)]):
                        default_quota = self.__state.quota_pools.default(pool_index)
                        self.__state.set_quota(self, owner, default_quota, pool_index, True)
            if task_type is not None:
                task_type = self.__state.add_task_type_(self, task_type)
            if client_tags is not None:
                client_tags = self.__state.add_client_tags_(self, client_tags)

            if queue_item is not None and queue_item.task_ref:
                if hosts is None:
                    hosts = queue_item.hosts
                elif priority is None:
                    priority = queue_item.priority
                if score is None and not isinstance(hosts, list):
                    score = queue_item.score
                if requirements is None:
                    requirements = queue_item.task_info.requirements
                if semaphores is None:
                    semaphores = queue_item.task_info.semaphores
                if task_type is None:
                    task_type = queue_item.task_info.type
                if owner is None:
                    owner = queue_item.task_info.owner
                if enqueue_time == 0:
                    enqueue_time = queue_item.task_info.enqueue_time
                if duration == 0:
                    duration = queue_item.task_info.duration
                if client_tags is None:
                    client_tags = queue_item.task_info.client_tags

            if hosts is not None:
                # noinspection PyTypeChecker
                self.__add_to_queue(
                    task_id, priority, hosts, score, qtypes.TaskInfo(
                        requirements=requirements, semaphores=semaphores,
                        type=task_type, owner=owner, enqueue_time=enqueue_time,
                        duration=duration, client_tags=client_tags
                    ), logger
                )

    @__rpc_method(allow_secondary=True)
    def get_hosts(self):
        return self.__state.hosts.encode()

    @__rpc_method()
    def add_hosts(self, hosts):
        self.__state.add_hosts_(self, hosts)
        return self.__state.hosts.encode()

    @__rpc_method(method_type=jserver.RPC.full)
    def push(self, task_id, priority, hosts, task_info=None, score=None, job=None):
        logger = job.log if job else self._logger
        self.__local.commit_timeout = self.PUSH_COMMIT_TIMEOUT
        self.__push(task_id, priority, hosts, task_info, score=score, logger=logger)
        self._wait_commit(logger=logger)

    def _reset(self, logger=None):
        if logger:
            logger.warning("RESETTING THE QUEUE!")
        self.__state = qstate.PersistentState()
        self.__local_state = qstate.LocalState()
        map(lambda g: g.dead or g.kill(), self.__db_operations_greenlets)
        self.__db_operations_greenlets = [
            self.__spawn(self.__db_operations_executor, queue_name)
            for queue_name in self.__state.db_operations.queues
        ]

    @__rpc_method(method_type=jserver.RPC.full)
    def sync(self, data, reset=False, job=None):
        logger = job.log if job else self._logger
        if reset and self._config.serviceq.server.allow_reset:
            self._reset(logger)
        self.__local.commit_timeout = 0
        last_operation_id = None
        for item in data:
            try:
                self.__push(*item, logger=logger)
                last_operation_id = self.__state.operation_id
            except qerrors.QAlreadyExecuting:
                logger.warning("Task #%s already executing", item[0])
            gevent.sleep()  # Allow event loop to execute another pending greenlet if any.
        self._wait_commit(operation_id=last_operation_id, timeout=self.SYNC_COMMIT_TIMEOUT)
        logger.info("Sync for %d items completed. Stats: %s", len(data), self.__state.stats)

    @staticmethod
    def __rollback_semaphores_changes(rollback, task_id, only_check):
        if not only_check:
            for _, sem, weight in rollback:
                sem.value -= weight
                sem.tasks.pop(task_id)

    def __critical_error(self, logger):
        self.__statistics.error += 1
        now = dt.datetime.now()
        self.__critical_errors.count += 1
        if self.__critical_errors.count >= self.MAX_CRITICAL_ERRORS:
            logger.critical(
                "Maximum number of critical errors reached (%s/%s), going to stop forcedly",
                self.__critical_errors.count, self.MAX_CRITICAL_ERRORS
            )
            self._stopping = True
            return
        if self.__critical_errors.last_time is None:
            self.__critical_errors.last_time = now
        if (now - self.__critical_errors.last_time).total_seconds() > self.CRITICAL_ERRORS_INTERVAL:
            self.__critical_errors.last_time = now
            self.__critical_errors.count = 0
        logger.exception("Unexpected error (%s)", self.__critical_errors)

    def __acquire_semaphores(self, logger, task_id):
        if task_id in self.__state.tasks_acquired:
            logger.info("Semaphores for task #%s already acquired", task_id)
            return True
        logger.info("Acquiring semaphores for task #%s", task_id)
        result = self.__state.acquire_semaphores(self, task_id)[0]
        if result:
            logger.info("Semaphores %r for task #%s successfully acquired", result, task_id)
            self.__state.push_db_operation_(self, qtypes.DBOperations.UpdateTask(task_id, True), logger=logger)
        else:
            logger.info(
                "Conflict occurred while acquiring semaphores for task #%s, can acquire only %r", task_id, result
            )
        return bool(result)

    def __task_to_execute_it(self, queue, pool=None, blockers_state=None):
        semaphores = self.__state.semaphores
        semaphore_blockers = self.__state.semaphore_blockers
        all_task_semaphores = self.__state.task_semaphores
        for obj in list(common_itertools.chain(None, reversed(self.__state.owners_rating[pool]))):
            owner = qstate.UNLIMITED_OWNER if obj is None else self.__state.owners_rating_index[id(obj)][0]
            queues_by_priority = queue.get(owner)
            if queues_by_priority is None:
                continue
            for priority in sorted(queues_by_priority, reverse=True):
                cluster_queues = queues_by_priority.get(priority)
                if cluster_queues is None:
                    continue
                merge_queue = qstate.MergeQueue([_[1] for _ in cluster_queues])
                for item in merge_queue:
                    if blockers_state is not None:
                        task_id = item.task_id
                        task_semaphores = all_task_semaphores.get(task_id)
                        if task_id not in self.__state.tasks_acquired and semaphore_blockers.blocked(
                            task_id, task_semaphores, semaphores, blockers_state
                        ):
                            continue
                    queue_item = owner == qstate.UNLIMITED_OWNER, priority, item
                    yield queue_item

    def __can_acquire_semaphores(self, task_id, blockers_state):
        if task_id in self.__state.tasks_acquired:
            return True
        result, blocked_sem_id, required_weight, task_semaphores = self.__state.check_semaphores_capacity(task_id)
        if result:
            for sem_id, _, weight in result:
                blockers_state.occupy(sem_id, weight)
        else:
            blockers_state.block(task_id, blocked_sem_id, required_weight)
            if task_semaphores is not None:
                blockers_state.blocked_cache[task_semaphores.acquires] = blockers_state.blockers[task_id]
        return bool(result)

    @__rpc_method(allow_secondary=True, method_type=jserver.RPC.dupgenerator)
    def task_to_execute_it(self, host, host_info):
        last_operation_id = self.__state.operation_id
        if host is not None:
            host = self.__state.hosts.index.get(host, -1)
        queue = self.__state.host_queue.get(host)
        if queue is None:
            raise StopIteration([[], {}, last_operation_id])
        host_info = qtypes.HostInfo.decode(host_info)
        host_cap = host_info.capabilities
        free_cores, free_ram = host_info.free.cores, host_info.free.ram
        multislot = host_info.multislot
        host_disk_space = host_cap.disk_space
        all_matched_resources = {}
        blockers_state = self.__state.semaphore_blockers.State()
        skipped_due_disk_space = 0
        skipped_due_cores = 0
        skipped_due_ram = 0
        pool = (
            self.__state.quota_pools.match_pool(host_info.tags) if self.__quotas_config.use_pools else None
        )
        for unlimited, priority, queue_item in self.__task_to_execute_it(
            queue, pool, blockers_state
        ):
            score = queue_item.score
            task_id = queue_item.task_id
            task_info = self.__state.task_queue_index[task_id].task_info

            if unlimited:
                quota = self.__state.quotas.get(task_info.owner, {}).get(pool)
                if quota is None:
                    continue
                if quota[1][1] < -quota[0] * (priority - qstate.UNLIMITED_OWNER_MINIMUM_PRIORITY + 1) / 3:
                    continue

            disk_space = None
            matched_resources = {}
            requirements = task_info.requirements
            if host_cap:
                res_cap = 0
                if host_cap.resources is not None and requirements and requirements.resources:
                    matched_resources = {
                        rid: requirements.resources[rid]
                        for rid in requirements.resources.viewkeys() & host_cap.resources
                    }
                    res_cap = sum(it.ifilter(None, (
                        matched_resources.get(_) or all_matched_resources.get(_)
                        for _ in all_matched_resources.viewkeys() | matched_resources.viewkeys()
                    )))
                absent_res_size = max(
                    self.__state.task_res_sizes.get(task_id, 0) - res_cap, 0
                ) >> 10
                disk_space = requirements and requirements.disk_space or 0
                if host_disk_space < disk_space + absent_res_size:
                    skipped_due_disk_space += 1
                    continue

            req_cores, req_ram = (
                (requirements.cores or 1, requirements.ram)
                if requirements else
                (0, 0)
            )

            if multislot:
                if free_cores < req_cores or free_ram < req_ram:
                    skipped_due_cores += int(free_cores < req_cores)
                    skipped_due_ram += int(free_ram < req_ram)
                    continue

            task_semaphores = self.__state.task_semaphores.get(task_id)
            if task_semaphores and not self.__can_acquire_semaphores(task_id, blockers_state):
                continue

            result = yield [task_id, -score]

            if result == qtypes.QueueIterationResult.NEXT_TASK:
                if disk_space:
                    host_disk_space -= disk_space
                if multislot:
                    free_cores -= req_cores
                    free_ram -= req_ram
                all_matched_resources.update(matched_resources)
            elif result == qtypes.QueueIterationResult.ACCEPTED:
                break

        metrics = dict(
            skipped_due_disk_space=skipped_due_disk_space,
            skipped_due_cores=skipped_due_cores,
            skipped_due_ram=skipped_due_ram,
            blocked_by_semaphores=blockers_state.blocked_count,
        )
        raise StopIteration(
            [self.__state.semaphore_blockers.actual_blockers(blockers_state), metrics, last_operation_id]
        )

    @__rpc_method(method_type=jserver.RPC.dupgenerator)
    def lock(self, lock_name):
        if not isinstance(lock_name, str):
            raise TypeError("Lock name must be 'str' type")
        if lock_name in self.__local_state.locks:
            raise qerrors.QAcquireError("Lock already acquired")
        try:
            self.__local_state.locks.add(lock_name)
            yield lock_name
        finally:
            self.__local_state.locks.discard(lock_name)

    @__rpc_method(method_type=jserver.RPC.dupgenerator)
    def task_to_execute(self, host, host_info, job=None):
        logger = job.log if job else self._logger
        host_info = qtypes.HostInfo.decode(host_info)
        self.__statistics.wants_task(host_info.tags)
        self.__statistics.total_wants += 1
        if host is not None:
            host = self.__state.hosts.index.get(host, -1)
        if host not in self.__state.host_queue:
            return
        self.__state.update_host_capabilities_(self, host, host_info.capabilities)

        it_result = None
        while True:
            task = None
            while True:
                task_id, task_job_id = yield it_result
                if task_id is None:
                    it_result = qtypes.QueueIterationResult.NO_TASKS
                    break
                self.__statistics.task_to_execute_iterations += 1
                task = self.__state.task_queue_index.get(task_id)
                if task is None or not task.task_ref:
                    it_result = qtypes.QueueIterationResult.SKIP_TASK
                    continue
                it_result = qtypes.QueueIterationResult.ACCEPTED
                break

            if it_result == qtypes.QueueIterationResult.ACCEPTED:
                semaphores = self.__state.task_semaphores.get(task_id)
                semaphores_already_acquired = task_id in self.__state.tasks_acquired
                if semaphores:
                    if not self.__acquire_semaphores(logger, task_id):
                        self.__statistics.semaphore_conflict += 1
                        it_result = qtypes.QueueIterationResult.SKIP_TASK
                        logger.error("Task #%s cannot acquire all required semaphores", task_id)
                        continue
                if not task.task_ref:
                    self.__statistics.task_conflict += 1
                    if semaphores and not semaphores_already_acquired:
                        self.__release_semaphores(task_id, ctt.Status.ENQUEUED, logger)
                    it_result = qtypes.QueueIterationResult.SKIP_TASK
                    logger.error("Task #%s already popped from queue", task_id)
                    continue
                start_time = int(time.time())
                if task_job_id:
                    logger.info("Starting job %s for task #%s", task_job_id, task_id)
                    pool = self.__state.quota_pools.match_pool(host_info.tags)
                    owner_consumption = self.__state.consumptions.get(task.task_info.owner, {}).get(pool)
                    owner_quota = self.__state.quotas.get(task.task_info.owner, {}).get(pool, (0,))[0]
                    owner_consumed_qp = (sum(owner_consumption.qp) if owner_consumption else 0)
                    host_cap = (
                        task.task_info.requirements
                        if host_info.multislot else
                        host_info.capabilities
                    ) or qtypes.ComputingResources()
                    disk_space = task.task_info.requirements and task.task_info.requirements.disk_space or 0
                    hdd, ssd = (
                        (0, disk_space)
                        if ctc.Tag.SSD in host_info.tags else
                        (disk_space, 0)
                    )
                    result = self.__state.start_execution(
                        self, start_time, task_job_id, task_id,
                        self.__calculate_cpu_based_dominanta(
                            task.task_info.requirements, host_info.capabilities, host_info.multislot
                        ),
                        host_cap.ram or 0, host_cap.cores or 0, hdd, ssd, pool
                    )
                    if not result:
                        self.__statistics.job_conflict += 1
                        if result is False:
                            if semaphores and not semaphores_already_acquired:
                                self.__release_semaphores(task_id, ctt.Status.ENQUEUED, logger)
                            it_result = qtypes.QueueIterationResult.SKIP_JOB
                            already_started_execution = self.__state.executions.get(task_job_id)
                            logger.error(
                                "Job %s already started for task #%s",
                                task_job_id, already_started_execution and already_started_execution[1]
                            )
                            continue
                        # Pop the task out of the queue and try to find a job which locked the task to log it
                        self.__push(task_id, None, None, logger=logger)
                        self.__statistics.task_conflict += 1
                        for jid, (_, tid) in self.__state.executions.iteritems():
                            if tid == task_id:
                                logger.error("Task #%s already started with job %x", task_id, jid)
                                break
                        else:
                            logger.error("Task #%s already started with UNKNOWN JOB", task_id)
                        it_result = qtypes.QueueIterationResult.SKIP_TASK
                        continue
                    for tag in host_info.tags:
                        self.__local_state.last_quota_remnants[tag].append((owner_consumed_qp, owner_quota))
                if semaphores:
                    self.__state.del_wanted_semaphores(self, task_id, [a.name for a in semaphores.acquires])
                self.__state.del_task(self, task_id)
                self._wait_commit(logger=logger)

                self.__statistics.got_task(host_info.tags)
                self.__statistics.total_got += 1

            if it_result == qtypes.QueueIterationResult.NO_TASKS:
                break

        semaphore_blockers, metrics, last_operation_id = yield it_result

        for metric_name in metrics.viewkeys() & qtypes.Statistics.known_counters:
            setattr(self.__statistics, metric_name, getattr(self.__statistics, metric_name) + metrics[metric_name])

        if semaphore_blockers:
            self.__state.add_semaphore_blockers(self, semaphore_blockers)

    @staticmethod
    def __calculate_cpu_based_dominanta(requirements, capabilities, multislot=False):
        scale = qstate.FRACTIONAL_SCALE
        if multislot:
            host_cores = capabilities.cores or 1
            req_cores = requirements.cores or 1
            host_ram = (capabilities.ram + 1023) >> 10
            req_ram = (requirements.ram + 1023) >> 10
            return max(
                req_cores * scale,
                req_ram * host_cores * scale // host_ram
            )
        return capabilities.cores * scale

    @__rpc_method(allow_secondary=True, method_type=jserver.RPC.generator)
    def queue(self):
        for priority in sorted(self.__state.task_queue, reverse=True):
            for item in self.__state.task_queue[priority]:
                if item:
                    yield item.encode()
        yield None
        yield self.__state.hosts.encode()
        yield self.__state.owners.encode()
        yield self.__state.task_types.encode()
        yield self.__state.client_tags.encode()

    @__rpc_method(allow_secondary=True)
    def queue_by_host(self, host, pool=None):
        host = self.__state.hosts.index.get(host, -1)
        queue = self.__state.host_queue.get(host)
        return [i.encode(priority=p) for _, p, i in self.__task_to_execute_it(queue, pool)] if queue else []

    @__rpc_method(allow_secondary=True)
    def queue_by_task(self, task_id):
        queue = self.__state.task_queue_index.get(task_id)
        return (
            (self.__state.hosts.encode(), queue.encode())
            if queue is not None and queue else
            (None, None)
        )

    @__rpc_method(allow_secondary=True)
    def task_queue(self, task_id, pool=None):
        result = {}
        task_queue = self.__state.task_queue_index.get(task_id)
        if task_queue is not None and task_queue.task_ref:
            hosts = task_queue.hosts
            if task_queue.score is not None:
                hosts = self.__state.hosts.indexes_from_bits(hosts)
            for host_item in hosts:
                host = host_item[1] if task_queue.score is None else host_item
                host_queue = self.__state.host_queue.get(host)
                if host_queue:
                    i = 0
                    queue_size = sum(len(_[1]) for _ in host_queue.itervalues() for _ in _.itervalues() for _ in _)
                    for _, _, item in self.__task_to_execute_it(host_queue, pool):
                        i += 1
                        if item.task_id == task_id:
                            result[self.__state.hosts[host]] = (i, queue_size)
                            break
        return result

    @__rpc_method(allow_secondary=True)
    def validate(self):
        queue_tids = self.__state.task_queue_index.keys()
        executing_jobs = {
            uuid.UUID(bytes=job_id).hex
            for pool_consumption in self.__state.consumptions.itervalues()
            for pool, consumption in pool_consumption.iteritems()
            for job_id in consumption.executing_jobs
            if pool is None
        } - self.__local_state.locked_jobs_ids
        return queue_tids, list(executing_jobs)

    @__rpc_method(allow_secondary=True)
    def ping(self, value):
        return value

    @__rpc_method(allow_secondary=True)
    def resources(self):
        return self.__state.resources

    @__rpc_method(method_type=jserver.RPC.full)
    def create_semaphore(self, fields, job=None):
        logger = job.log if job else self._logger
        name = fields.pop("name")
        owner = fields.pop("owner")
        if not name:
            raise ValueError("name is required")
        if not owner:
            raise ValueError("owner is required")
        sem_id = self.__state.semaphore_index.get(name)
        if name in self.__state.semaphore_index:
            raise ValueError("Semaphore with name '{}' already exists: #{}", name, sem_id)
        sem_id = self.__state.next_id(self, "semaphore")
        capacity = fields["capacity"] = int(fields.get("capacity", 1))
        if capacity < 0:
            raise ValueError("Field 'capacity' must be non negative integer")
        fields["auto"] = False
        shared = fields.get("shared", ())
        public = bool(fields.get("public", False))
        self.__state.add_semaphore(self, sem_id, name, owner, capacity, False, shared, public)
        self.__state.push_db_operation_(
            self, qtypes.DBOperations.CreateSemaphore(sem_id, name, owner, **fields), logger=logger
        )
        self._wait_commit(logger=logger)
        return sem_id, self.__state.semaphores[sem_id].encode()

    @__rpc_method(method_type=jserver.RPC.full)
    def set_api_quota(self, login, api_quota, job=None):
        logger = job.log if job else self._logger
        self.__state.set_api_quota(self, login, api_quota)
        self.__state.push_db_operation_(self, qtypes.DBOperations.SetApiQuota(login, api_quota), logger=logger)
        self._wait_commit(logger=logger)
        if api_quota is None:
            api_quota = self.__api_quotas_config.api.default_quota
        if login in self.__local_state.complex_api_consumption.api_consumption.consumption:
            self.__local_state.complex_api_consumption.api_consumption.consumption[login] = (
                self.__local_state.complex_api_consumption.api_consumption.consumption[login][0], api_quota
            )
        return api_quota

    @__rpc_method(method_type=jserver.RPC.full)
    def set_web_api_quota(self, api_quota, job=None):
        logger = job.log if job else self._logger
        self.__state.set_web_api_quota(self, api_quota)
        self._wait_commit(logger=logger)
        if api_quota is None:
            api_quota = self.__api_quotas_config.web.default_quota
        for login in self.__local_state.complex_api_consumption.web_api_consumption.consumption.iterkeys():
            self.__local_state.complex_api_consumption.web_api_consumption.consumption[login] = (
                self.__local_state.complex_api_consumption.web_api_consumption.consumption[login][0], api_quota
            )
        return api_quota

    @__rpc_method()
    def get_api_quota(self, login):
        return self.__state.api_quotas.get(login, self.__api_quotas_config.api.default_quota)

    @__rpc_method()
    def get_web_api_quota(self):
        web_api_quota = (
            self.__state.web_api_quota
            if self.__state.web_api_quota is not None else
            qtypes.ComplexApiConsumption.WEB_DEFAULT_QUOTA
        )
        return web_api_quota

    @__rpc_method()
    def get_api_quotas_table(self):
        now = time.time()
        if now - self.__api_quota_last_update > 9:
            self.__serialized_api_quota = msgpack.dumps(
                self.__local_state.complex_api_consumption.api_consumption.consumption,
                use_bin_type=True
            )
            self.__api_quota_last_update = now
        return self.__serialized_api_quota

    @__rpc_method()
    def get_api_consumption(self, login):
        return self.__local_state.complex_api_consumption.api_consumption.consumption.get(login)

    @__rpc_method()
    def get_web_api_consumption(self, login):
        return self.__local_state.complex_api_consumption.web_api_consumption.consumption.get(login)

    @__rpc_method(method_type=jserver.RPC.full)
    def update_semaphore(self, sem_id, fields, job=None):
        logger = job.log if job else self._logger
        sem = self.__state.semaphores.get(sem_id)
        if sem is None:
            raise mapping.DoesNotExist("Semaphore #{} not found".format(sem_id))
        capacity = int(fields.get("capacity", sem.capacity))
        if capacity < 0:
            raise ValueError("Field 'capacity' must be non negative integer")
        auto = fields.get("auto", sem.auto)
        if not isinstance(auto, (bool, type(None))):
            raise TypeError("Field 'auto' must be of bool type")
        self.__state.update_semaphore(
            self, sem_id, fields.get("owner"), capacity, auto, fields.get("shared"), fields.get("public")
        )
        self.__state.push_db_operation_(self, qtypes.DBOperations.UpdateSemaphore(sem_id, **fields), logger=logger)
        self._wait_commit(logger=logger)
        return sem.encode()

    @__rpc_method(method_type=jserver.RPC.full)
    def delete_semaphore(self, sem_id, job=None):
        logger = job.log if job else self._logger
        sem = self.__state.semaphores.get(sem_id)
        if sem is None:
            raise mapping.DoesNotExist("Semaphore #{} not found".format(sem_id))
        if sem.value or self.__state.wants_semaphore.get(sem_id):
            return False
        self.__state.remove_semaphore(self, sem_id)
        self.__state.push_db_operation_(self, qtypes.DBOperations.DeleteSemaphore(sem_id), logger=logger)
        self._wait_commit(logger=logger)
        return True

    def __release_semaphores(self, task_id, status, logger, wait_commit=True):
        logger.info(
            "Releasing semaphores for task #%s %s",
            task_id,
            "in status {}".format(status) if status else "despite the status"
        )
        result = self.__state.release_semaphores(self, task_id)
        if result:
            released, sem_ids = result
            logger.info("Semaphores %r for task #%s successfully released", released, task_id)
            self.__state.push_db_operation_(self, qtypes.DBOperations.UpdateTask(task_id, False), logger=logger)
            if wait_commit:
                self._wait_commit(logger=logger)
            return True
        logger.warning("Conflict occurred while releasing semaphores for task #%s", task_id)
        return False

    @__rpc_method(method_type=jserver.RPC.full)
    def release_semaphores(self, task_id, prev_status, status, job=None):
        """
        Release acquired semaphores for the task

        :param task_id: id of task that acquired semaphores
        :param prev_status: task status before releasing
        :param status: task status after releasing, if None then release despite the task status
        :param job: RPC job object
        :return: True if operation successful
        """
        if task_id not in self.__state.tasks_acquired:
            return False
        logger = job.log if job else self._logger
        semaphores = self.__state.task_semaphores.get(task_id)
        if not semaphores or status is not None and (
            prev_status == status and status == ctt.Status.ENQUEUED or
            status not in ctt.Status.Group.expand(semaphores.release)
        ):
            return False
        return self.__release_semaphores(task_id, status, logger)

    @__rpc_method()
    def semaphore_values(self, sem_ids):
        return map(
            lambda _: _ and _.value,
            it.imap(
                lambda _: self.__state.semaphores.get(_),
                sem_ids
            )
        )

    @__rpc_method()
    def semaphore_tasks(self, sem_id):
        sem = self.__state.semaphores.get(sem_id)
        return sem.tasks.items() if sem else []

    @__rpc_method()
    def semaphore_group(self, name):
        group = self.__state.semaphore_groups.get(name)
        return map(sorted, group) if group else ((), ())

    def __read_snapshot(self, f):
        unpacker = common_data.msgpack_unpacker()
        total_size = os.fstat(f.fileno()).st_size
        size = 0
        prev_perc = None
        for data in self.__read_fd(f.fileno()):
            size += len(data)
            perc = size * 100 / total_size
            if prev_perc is None or perc > prev_perc or size == total_size:
                self._logger.info("Reading snapshot: %d / %d bytes [%d%%]", size, total_size, perc)
                prev_perc = perc
            unpacker.feed(data)
            for obj in unpacker:
                yield obj

    def load_snapshot(self, operation_id_ready=None):
        snapshot_path, journal_path = self.__state_path_to_load
        self._logger.info("Loading local snapshot from %s and journal from %s", snapshot_path, journal_path)
        # noinspection PyBroadException
        try:
            data = None
            if os.path.exists(snapshot_path):
                with open(snapshot_path) as f:
                    for data in self.__read_snapshot(f):
                        if not isinstance(data, (int, long)):
                            break
                        self.__state.operation_id = data
                        self._logger.info("Last operation of the snapshot: #%s", self.__state.operation_id)
                    assert data is not None
            else:
                self._logger.warning("Local snapshot not found")

            # finding/creating journal
            self.__journal = qjournal.OperationJournal(journal_path, logger=self._logger)
            if not self.__journal.newly_created:
                op = next(iter(self.__journal), None)
                first_operation_id = op.operation_id if op else 0
                self._logger.info(
                    "Journal exists and contains %s operations #%s..%s",
                    self.__journal.counter, first_operation_id, self.__journal.operation_id
                )
                if self.__state.operation_id and first_operation_id != self.__state.operation_id + 1:
                    self._logger.warning(
                        "Found gap #%s..%s, replacing journal",
                        self.__state.operation_id, first_operation_id
                    )
                    self.__journal = qjournal.OperationJournal(journal_path, force_create=True, logger=self._logger)

            if operation_id_ready is not None:
                operation_id_ready.set()

            self._logger.info("Restoring state...")
            with common_context.disabled_gc():
                state = self.__state
                if data:
                    state = qstate.PersistentState.decode(data, logger=self._logger)
                # applying operations from journal
                for operation in self.__journal:
                    if not self.__state.operation_id:
                        self.__state.operation_id = operation.operation_id - 1
                    method_name = state.operations[operation.method].__name__
                    self._logger.info("Applying journal operation #%s [%s]", operation.operation_id, method_name)
                    state.apply(self, *operation)
            if self._status != qtypes.Status.SECONDARY:
                self.__state = state
                self._logger.info("State restored (#%s)", self.__state.operation_id)
            else:
                self._logger.warning("No need to restore state from local snapshot - got state from PRIMARY")
            self.__snapshot_restored = True
        except Exception:
            if operation_id_ready is not None:
                operation_id_ready.set()
            self.__statistics.error += 1
            self._logger.exception("Error while loading snapshot")

    def snapshot_dumper(self):
        last_time = time.time()
        logger = self._logger.getChild(self.snapshot_dumper.__name__)
        logger.info("Started with period of %ss", self.__dump_snapshot_interval)
        while self.__running:
            gevent.sleep(1)
            now = time.time()
            if now - last_time < self.__dump_snapshot_interval or self._status not in [
                qtypes.Status.SECONDARY, qtypes.Status.PRIMARY
            ]:
                continue
            last_time = now
            # noinspection PyBroadException
            try:
                self.dump_snapshot(logger)
            except Exception:
                self.__statistics.error += 1
                logger.exception("Error while dumping snapshot")

    def __rotate_snapshot_backups(self, snapshot_path):
        src_path = None  # for stupid IDE only
        for i in range(self.SNAPSHOT_BACKUPS - 1, 0, -1):
            src_path = "{}.{}".format(snapshot_path, i)
            dst_path = "{}.{}".format(snapshot_path, i + 1)
            if os.path.exists(src_path):
                os.rename(src_path, dst_path)
        if os.path.exists(snapshot_path) and src_path:
            os.link(snapshot_path, src_path)

    def add_to_journal(self, operation):
        if self.__journal:
            data = self.__journal.add(operation, operation.operation_id)
            if self.__new_journal:
                self.__new_journal.add(data, operation.operation_id, raw=True)

    def dump_snapshot(self, logger=None):
        if logger is None:
            logger = self._logger
        logger.info("Started")
        journal_path = self.__journal_path_to_save
        journal_path_tmp = journal_path + "~"
        self.__new_journal = qjournal.OperationJournal(journal_path_tmp, force_create=True, logger=logger)
        # assume this block of code is executed atomically from the gevent's point of view
        # {
        r, w = os.pipe()
        pid = os.fork()
        # }
        if not pid:
            # noinspection PyBroadException
            try:
                os.close(r)
                os.dup2(w, sys.stderr.fileno())
                os.close(w)
                snapshot_path_tmp = self.__snapshot_path_to_save + "~"
                with open(snapshot_path_tmp, "w") as f:
                    self.__write_snapshot(f.fileno())
                self.__rotate_snapshot_backups(self.__snapshot_path_to_save)
                os.rename(snapshot_path_tmp, self.__snapshot_path_to_save)
            except:
                print(tb.format_exc(), file=sys.stderr, end="")
                os._exit(1)
            os._exit(0)
        os.close(w)
        buf = io.BytesIO()
        map(buf.write, self.__read_fd(r))
        stderr = buf.getvalue()
        if stderr:
            logger.error("Failed: %s", stderr)
        else:
            logger.info("Finished successfully")
            os.rename(journal_path_tmp, journal_path)
            self.__journal, self.__new_journal = self.__new_journal, None
        return not stderr

    def __write_snapshot(self, w):
        setproctitle.setproctitle("{} Making snapshot: collecting garbage".format(self.PROC_PREFIX))
        self.__collect_garbage()
        setproctitle.setproctitle("{} Making snapshot: packing".format(self.PROC_PREFIX))
        with common_context.disabled_gc():
            packer = msgpack.Packer(default=lambda _: _.encode() if isinstance(_, qtypes.Serializable) else _)
            os.write(w, packer.pack(self.__state.operation_id))
            data = packer.pack(self.__state.encode())
            total = len(data)
            written = 0
            prev_perc = None
            for chunk in common_itertools.chunker(data, self.__snapshot_chunk_size):
                os.write(w, chunk)
                written += len(chunk)
                perc = written * 100 / total
                if prev_perc is None or perc > prev_perc:
                    setproctitle.setproctitle("{} Making snapshot: writing {}%".format(self.PROC_PREFIX, perc))
                prev_perc = perc

    def __read_fd(self, r):
        gevent.os.make_nonblocking(r)
        while True:
            data = gevent.os.nb_read(r, self.__snapshot_chunk_size)
            if not data:
                break
            yield data

    def __make_snapshot(self):
        # assume this block of code is executed atomically from the gevent's point of view
        # {
        r, w = os.pipe()
        pid = os.fork()
        # }
        if not pid:
            os.close(r)
            self.__write_snapshot(w)
            os.close(w)
            os._exit(0)
        try:
            os.close(w)
            buf = io.BytesIO()
            map(buf.write, self.__read_fd(r))
            return buf.getvalue()
        finally:
            try:
                os.kill(pid, signal.SIGKILL)
            except OSError:
                pass

    def api_quotas_updater(self):
        self._logger.info("Start api quota updater.")
        while True:
            self._logger.info("Start api quota table rotation.")
            self.__local_state.complex_api_consumption.rotate_table()
            gevent.sleep(1)

    def resource_locks_cleaner(self):
        self._logger.info("Start resource locks cleaner.")
        while True:
            self._logger.info("Start clean resource locks.")
            self.__state.clean_resource_locks(self, int(time.time()))
            gevent.sleep(600)

    def __next_snapshot_id(self, operation_index=None):
        self.__local_state.snapshot_id += 1
        snapshot_id = self.__local_state.snapshot_id
        self.__state.oplogs[snapshot_id] = self.__state.FollowerOplog((
            collections.deque(
                ()
                if operation_index is None else
                it.islice(self.__state.common_oplog, operation_index, None)
            ),
            time.time()
        ))
        return snapshot_id

    @property
    def snapshot_operation_id(self):
        snapshot = self.__local_state.snapshot
        return snapshot.operation_id if snapshot else qstate.inf

    @__rpc_method(method_type=jserver.RPC.generator)
    def snapshot(self, operation_id=None, operations_checksum=None, job=None):
        logger = job.log if job else self._logger
        if operation_id:
            if operation_id > self.__state.operation_id:
                logger.warning(
                    "Snapshot is requested for wrong last operation id: %s > %s",
                    operation_id, self.__state.operation_id
                )
            elif operation_id == self.__state.operation_id:
                if operations_checksum != self.__state.operations_checksum:
                    logger.warning(
                        "Wrong checksum for operation #%s: %s, expected %s",
                        operation_id, operations_checksum, self.__state.operations_checksum
                    )
                else:
                    logger.info("No need to return snapshot")
                    snapshot_id = self.__next_snapshot_id()
                    yield snapshot_id, None
                    return
            elif self.__state.common_oplog and operation_id >= self.__state.common_oplog[0].operation_id - 1:
                operation_index = operation_id - self.__state.common_oplog[0].operation_id
                if operation_index < 0:
                    checksum = self.__state.common_oplog[0].prev_checksum
                else:
                    checksum = self.__state.common_oplog[operation_index].checksum
                if operations_checksum != checksum:
                    logger.warning(
                        "Wrong checksum for operation #%s: %s, expected %s",
                        operation_id, operations_checksum, checksum
                    )
                else:
                    logger.info("No need to return snapshot")
                    snapshot_id = self.__next_snapshot_id(operation_index + 1)
                    yield snapshot_id, None
                    return
        snapshot = self.__local_state.snapshot
        if snapshot and snapshot.data is None:
            raise qerrors.QSnapshotNotReady
        if snapshot is None:
            logger.info("Creating new snapshot")
            with common_context.Timer() as timer:
                self.__cleanup_oplog()
                self.__cleanup_oplog_enabled = False
                try:
                    # noinspection PyArgumentList
                    operation_id = self.__state.operation_id
                    self.__local_state.snapshot = self.__state.Snapshot(None, operation_id)
                    snapshot_id = self.__next_snapshot_id()
                    data = self.__make_snapshot()
                    snapshot = self.__local_state.snapshot = self.__state.Snapshot(data, operation_id=operation_id)
                    self.__local_state.start_snapshot_watchdog()
                except BaseException:
                    self.__local_state.snapshot = None
                    raise
                finally:
                    self.__cleanup_oplog_enabled = True
            logger.info("New snapshot created in %s", timer)
        else:
            snapshot_id = self.__next_snapshot_id(
                snapshot.operation_id - self.__state.common_oplog[0].operation_id + 1
                if self.__state.common_oplog else
                0
            )
        snapshot_size = len(snapshot.data)
        with self.__simultaneous_snapshots:
            logger.info("Snapshot #%s is ready to send (%s)", snapshot_id, common_format.size2str(snapshot_size))
            yield snapshot_id, snapshot_size
            size = 0
            for chunk in common_itertools.chunker(snapshot.data, self.__snapshot_chunk_size):
                size += len(chunk)
                logger.info(
                    "Sending snapshot #%s: %d / %d bytes [%.2f%%]",
                    snapshot_id, size, snapshot_size, size * 100. / snapshot_size
                )
                yield chunk
            logger.info("Snapshot #%s sent (%s)", snapshot_id, common_format.size2str(size))

    def __cleanup_oplog(self):
        if not self.__cleanup_oplog_enabled:
            return
        deadtime = time.time() - self.__oplog_ttl
        min_operation_id = self.__state.common_oplog[0].operation_id if self.__state.common_oplog else None
        for snapshot_id, (oplog, update_time) in self.__state.oplogs.items():
            if update_time < deadtime:
                del self.__state.oplogs[snapshot_id]
            elif oplog:
                operation_id = oplog[0].operation_id
                if min_operation_id is None or operation_id < min_operation_id:
                    min_operation_id = operation_id

        if self.__local_state.snapshot is None or not self.__state.common_oplog:
            return
        snapshot_lag = self.__local_state.snapshot.operation_id + 1 - min_operation_id
        if snapshot_lag < 0 or snapshot_lag == 0 and len(self.__state.common_oplog) > qstate.MAX_OPLOG_SIZE:
            self._logger.info("Resetting outdated snapshot")
            self.__local_state.snapshot = None

    @__rpc_method(method_type=jserver.RPC.dupgenerator, account=False)
    def oplog(self, snapshot_id, node_addr):
        try:
            self.__local_state.stop_snapshot_watchdog()
            if node_addr and node_addr not in self.__contenders:
                self.__contenders.append(node_addr)
                if self._contender:
                    self._contender.contenders = self.__contenders
            while self.__replicating:
                oplog = self.__state.oplogs.get(snapshot_id)
                if oplog is None:
                    raise qerrors.QOutdated(
                        "Oplog index for {} (snapshot #{}) is outdated".format(node_addr, snapshot_id)
                    )
                oplog.update_time = time.time()
                if not oplog.oplog:
                    gevent.sleep(.01)
                    continue
                self.__cleanup_oplog()
                chunk = [oplog.oplog.popleft() for _ in six.moves.range(min(self.__oplog_chunk_size, len(oplog.oplog)))]
                committed_operation_id = yield self.__state.operation_id, chunk
                self.__replicated_operation_ids[node_addr] = committed_operation_id
                self.__replicated_operation_ids_changed.set()
                self.__replicated_operation_ids_changed = gevent.event.Event()
        finally:
            self._logger.warning("Dropping oplog for snapshot #%s", snapshot_id)
            self.__state.oplogs.pop(snapshot_id, None)
            self.__cleanup_oplog()
            self.__replicated_operation_ids.pop(node_addr, None)

    @__rpc_method(allow_secondary=True)
    def status(self):
        return self._status

    @__rpc_method(method_type=jserver.RPC.full)
    def execution_completed(self, job_id, job=None):
        logger = job.log if job else self._logger
        logger.info("Finishing job %s", job_id)

        executions = self.__state.finish_execution(self, int(time.time()), job_id)
        for execution in executions:
            self.__statistics.done_task(execution)

        return map(
            lambda _: _._replace(consumption=qstate.quota_downscale(_.consumption)),
            executions
        )

    def __pool_index(self, pool):
        pool_index = None
        if pool is not None:
            pool_index = self.__state.quota_pools.pools.index.get(pool)
            assert pool_index is not None, "unregistered pool {}".format(pool)
        return pool_index

    @__rpc_method(allow_secondary=True, method_type=jserver.RPC.generator)
    def current_consumptions(self, pool=None):
        pool_index = self.__pool_index(pool)
        for owner, pool_consumptions in self.__state.consumptions.items():
            consumption = pool_consumptions.get(pool_index)
            if consumption is None:
                continue
            yield self.__state.owners[owner], map(qstate.quota_downscale, consumption.qp)

    @__rpc_method(allow_secondary=True, method_type=jserver.RPC.generator)
    def recalculate_consumptions(self, pool=None):
        pool_index = self.__pool_index(pool)
        for owner, pool_consumptions in self.__state.consumptions.items():
            consumption = pool_consumptions.get(pool_index)
            if consumption is None:
                continue
            yield self.__state.owners[owner], map(qstate.quota_downscale, consumption.recalculate())

    @__rpc_method(allow_secondary=True, method_type=jserver.RPC.generator)
    def dump_consumptions(self, pool=None):
        pool_index = self.__pool_index(pool)
        for owner, pool_consumptions in self.__state.consumptions.items():
            consumption = pool_consumptions.get(pool_index)
            if consumption is None:
                continue
            yield self.__state.owners[owner], consumption.encode()

    @__rpc_method(method_type=jserver.RPC.full)
    def reset_consumptions(self, job=None):
        logger = job.log if job else self._logger
        logger.warning("RESETTING CONSUMPTIONS")
        self.__state.executing_tids.clear()
        self.__state.consumptions.clear()
        self.__state.executions.clear()

    @__rpc_method(method_type=jserver.RPC.full)
    def calculate_consumptions(self, job=None):
        logger = job.log if job else self._logger
        self.__calculate_consumption(int(time.time()), logger=logger)

    @__rpc_method(method_type=jserver.RPC.full)
    def set_quota(self, owner, quota, pool=None, use_cores=False, job=None):
        logger = job.log if job else self._logger
        logger.info("Setting quota to %s for owner %s in pool %s", quota, owner, pool)
        owner_index = self.__state.add_task_owner_(self, owner)
        pool_index = self.__pool_index(pool)
        if quota is None and owner_index in self.__state.owner_parents:
            raise ValueError("Cannot reset quota for child group")
        if owner_index in self.__state.owner_parents.viewvalues():
            raise ValueError("Cannot set quota for parent group")
        if pool_index is None:
            self.__state.push_db_operation_(self, qtypes.DBOperations.SetQuota(owner, quota), logger=logger)
        default = quota is None
        if default:
            quota = qstate.quota_downscale(self.__state.quota_pools.default(pool_index))
        self.__state.set_quota(self, owner_index, qstate.quota_upscale(quota, use_cores=use_cores), pool_index, default)
        self._wait_commit(logger=logger)

    @__rpc_method(method_type=jserver.RPC.full)
    def reset_api_consumption(self, job=None):
        logger = job.log if job else self._logger
        logger.info("Clean api consumption")
        self.__local_state.complex_api_consumption = qtypes.ComplexApiConsumption()

    @__rpc_method()
    def quota(self, owner, pool=None, use_cores=False, return_defaults=True):
        owner = self.__state.owners.index.get(owner)
        if owner is None:
            return
        pool_index = self.__pool_index(pool)
        quotas_item = self.__state.quotas[owner].get(pool_index)
        if quotas_item is None:
            return (
                qstate.quota_downscale(self.__state.quota_pools.default(pool_index), use_cores=use_cores)
                if return_defaults else
                None
            )
        quota, _, default = quotas_item
        if not return_defaults and default:
            return
        return qstate.quota_downscale(quota, use_cores=use_cores)

    def _owners_quota(self, owner, pool, use_cores=False, return_defaults=True):
        owner = self.__state.owners.index.get(owner)
        if owner is None:
            return qtypes.QuotaItem(0, 0, 0)

        real_consumption, future_consumption, quota = 0, 0, None
        consumption = self.__state.consumptions.get(owner, {}).get(pool)
        if consumption:
            real_consumption = qstate.quota_downscale(consumption.qp[0], use_cores=use_cores)
            future_consumption = qstate.quota_downscale(consumption.qp[1], use_cores=use_cores)
        quotas_item = self.__state.quotas.get(owner, {}).get(pool)
        if quotas_item is not None:
            quota, _, default = quotas_item
            quota = qstate.quota_downscale(quota, use_cores=use_cores) if return_defaults or not default else None
        return qtypes.QuotaItem(real_consumption, future_consumption, quota)

    @__rpc_method(allow_secondary=True)
    def owners_rating(self, owner=None, pool=None):
        owner = self.__state.owners.index.get(owner)
        pool_index = self.__pool_index(pool)
        rating = []
        for rating_item in reversed(self.__state.owners_rating[pool_index]):
            rating_owner, _ = self.__state.owners_rating_index[id(rating_item)]
            consumption = self.__state.consumptions.get(rating_owner, {}).get(pool_index)
            remaining_quota = rating_item[0]
            executing_jobs = (
                len(consumption.executing_jobs)
                if consumption and (owner is None or owner == rating_owner) else
                0
            )
            rating.append([
                self.__state.owners[rating_owner],
                qtypes.OwnersRatingItem(
                    remnant_ratio=-remaining_quota[0],
                    remnant=qstate.quota_downscale(remaining_quota[1]),
                    real_consumption=qstate.quota_downscale(consumption.qp[0]) if consumption else 0,
                    future_consumption=qstate.quota_downscale(consumption.qp[1]) if consumption else 0,
                    quota=qstate.quota_downscale(self.__state.quotas[rating_owner][pool_index][0]),
                    executing_jobs=executing_jobs,
                    queue_size=self.__state.queue_size_by_owners[rating_owner],
                    is_default_quota=self.__state.quotas[rating_owner][pool_index][2],
                )
            ])
        return rating

    @__rpc_method(allow_secondary=True)
    def owners_rating_by_pools(self, owner=None):
        owner = self.__state.owners.index.get(owner)
        cache = self.__owners_rating_by_pools_cache.setdefault(owner, [0, None])
        if cache[0] > time.time():
            return cache[1]
        rating_by_pools = {}
        for pool_index, owners_rating in self.__state.owners_rating.items():
            rating = []
            for rating_item in reversed(owners_rating):
                rating_owner, _ = self.__state.owners_rating_index[id(rating_item)]
                queue_size = self.__state.queue_size_by_owners[rating_owner]
                if not queue_size and owner != rating_owner:
                    continue
                consumption = self.__state.consumptions.get(rating_owner, {}).get(pool_index)
                remaining_quota = rating_item[0]
                executing_jobs = (
                    len(consumption.executing_jobs)
                    if consumption and (owner is None or owner == rating_owner) else
                    0
                )
                rating.append([
                    self.__state.owners[rating_owner],
                    qtypes.OwnersRatingItem(
                        remnant_ratio=-remaining_quota[0],
                        remnant=qstate.quota_downscale(remaining_quota[1]),
                        real_consumption=qstate.quota_downscale(consumption.qp[0]) if consumption else 0,
                        future_consumption=qstate.quota_downscale(consumption.qp[1]) if consumption else 0,
                        quota=qstate.quota_downscale(self.__state.quotas[rating_owner][pool_index][0]),
                        executing_jobs=executing_jobs,
                        queue_size=queue_size,
                        is_default_quota=self.__state.quotas[rating_owner][pool_index][2],
                    )
                ])
            gevent.sleep(0)
            rating_by_pools[self.__state.quota_pools.pools[pool_index] if pool_index is not None else None] = rating
        cache[:] = [time.time() + 60, rating_by_pools]
        return rating_by_pools

    @__rpc_method(allow_secondary=True)
    def multiple_owners_quota(self, owners=(), pool=None, use_cores=False, return_defaults=True):
        pool_index = self.__pool_index(pool)
        quotas = []
        for owner in owners:
            quotas.append(
                [owner, self._owners_quota(owner, pool_index, use_cores=use_cores, return_defaults=return_defaults)]
            )
        return quotas

    @__rpc_method(allow_secondary=True)
    def multiple_owners_quota_by_pools(self, owners=(), use_cores=False, return_defaults=True):
        quotas_by_pools = {}
        for pool, pool_index in it.chain(self.__state.quota_pools.pools.index.items(), [(None, None)]):
            quotas = []
            for owner in owners:
                quotas.append(
                    [owner, self._owners_quota(owner, pool_index, use_cores=use_cores, return_defaults=return_defaults)]
                )
            quotas_by_pools[pool] = quotas
        return quotas_by_pools

    @__rpc_method(method_type=jserver.RPC.full)
    def set_parent_owner(self, owner, parent, job=None):
        logger = job.log if job else self._logger
        logger.info("Setting parent for %s to %s", owner, parent)
        owner_index = self.__state.owners.index.get(owner)
        if owner_index is None:
            raise ValueError("Unknown owner '{}'".format(owner))
        parent_index = self.__state.owners.index.get(parent)
        if parent:
            if parent_index is None:
                raise ValueError("Unknown parent '{}'".format(parent))
            elif parent_index in self.__state.owner_parents:
                raise ValueError("Parent '{}' is already child".format(parent))
        if owner_index in self.__state.owner_parents.viewvalues():
            raise ValueError("Owner '{}' is already parent".format(owner))
        self.__state.push_db_operation_(self, qtypes.DBOperations.SetParent(owner, parent), logger=logger)
        self.__state.set_parent_owner(self, owner_index, parent_index)
        self._wait_commit(logger=logger)

    @__rpc_method(allow_secondary=True)
    def parent_owners(self, owner=None):
        return {
            owner: self.__state.owners[self.__state.owner_parents[owner_index]]
            for owner, owner_index in (
                (
                    (self.__state.owners[i], i)
                    for i in self.__state.owner_parents
                )
                if owner is None else
                (
                    (o, self.__state.owners.index[o])
                    for o in common_itertools.chain(owner)
                )
            )
        }

    @__rpc_method()
    def last_quota_remnants(self):
        return {
            tag: [
                map(qstate.quota_downscale, remnant)
                for remnant in remnants
            ]
            for tag, remnants in self.__local_state.last_quota_remnants.iteritems()
        }

    @__rpc_method(allow_secondary=True)
    def operation_id(self, only_applied=False):
        return (
            self.__state.operation_id
            if only_applied else
            max(self.__state.operation_id, self.__journal.operation_id if self.__journal else 0)
        )

    @__rpc_method()
    def replication_info(self):
        return self.__replicated_operation_ids

    @__rpc_method(allow_secondary=True)
    def start_profiler(self):
        # noinspection PyUnresolvedReferences
        import yappi
        if yappi.is_running():
            yappi.stop()
        yappi.clear_stats()
        yappi.set_context_backend("greenlet")
        yappi.set_clock_type("cpu")
        yappi.start(builtins=True)

    @__rpc_method(allow_secondary=True)
    def stop_profiler(self, profile_format="CALLGRIND"):
        """
        Stop profiler

        :param profile_format: one of "YSTAT", "CALLGRIND", "PSTAT"
        :return: path to the profiler result
        """
        # noinspection PyUnresolvedReferences
        import yappi
        if not yappi.is_running():
            return
        yappi.stop()
        prof_stats = yappi.get_func_stats()
        now = dt.datetime.now()
        filename = os.path.join(
            self._config.common.dirs.runtime,
            ".".join((profile_format.lower(), PROFILE_FILENAME, now.strftime("%Y%m%d_%H%M%S")))
        )
        prof_stats.save(filename, type=profile_format)
        return filename

    @__rpc_method()
    def push_api_quota(self, delta):
        """
        :param delta: delta api consumption from last api quotas table update
        :return: consumption dictionary
        """

        web_api_quota = (
            self.__state.web_api_quota
            if self.__state.web_api_quota is not None else
            qtypes.ComplexApiConsumption.WEB_DEFAULT_QUOTA
        )

        for login, timestamp, consumption, web_consumption in delta:
            if timestamp > self.__local_state.complex_api_consumption.timestamp:
                self.__local_state.complex_api_consumption.rotate_table(timestamp)

            if consumption:
                self.__local_state.complex_api_consumption.api_consumption.add_consumption(
                    login, timestamp, consumption, self.__state.api_quotas.get(login)
                )
            if web_consumption:
                self.__local_state.complex_api_consumption.web_api_consumption.add_consumption(
                    login, timestamp, web_consumption, web_api_quota
                )

        return self.__local_state.complex_api_consumption.serialized_banned_list

    @__rpc_method(method_type=jserver.RPC.full)
    def acquire_resource_lock(self, resource_id, host, job=None):
        return self.__state.acquire_resource_lock(self, resource_id, host, int(time.time()))

    @__rpc_method(method_type=jserver.RPC.full)
    def release_resource_lock(self, resource_id, host, job=None):
        return self.__state.release_resource_lock(self, resource_id, host)

    @__rpc_method(allow_secondary=True)
    def semaphore_waiters(self, task_id):
        return list(self.__state.semaphore_blockers.tasks & set(common_itertools.chain(task_id)))

    @__rpc_method(method_type=jserver.RPC.generator, allow_secondary=True)
    def semaphore_wanting(self, sem_ids=None):
        for sem_id, tids in (
            self.__state.wants_semaphore.items()
            if sem_ids is None else
            (
                (_, self.__state.wants_semaphore.get(_))
                for _ in common_itertools.chain(sem_ids)
            )
        ):
            if tids is not None:
                yield sem_id, list(tids)

    @__rpc_method(method_type=jserver.RPC.generator, allow_secondary=True)
    def semaphores(self, sem_ids=None):
        for sem_id, sem in (
            self.__state.semaphores.items()
            if sem_ids is None else
            (
                (_, self.__state.semaphores.get(_))
                for _ in common_itertools.chain(sem_ids)
            )
        ):
            if sem is not None:
                yield sem_id, sem.encode()

    @__rpc_method(method_type=jserver.RPC.full)
    def prequeue_push(self, task_id, job=None):
        logger = job.log if job else self._logger
        for task_id in common_itertools.chain(task_id):
            if task_id in self.__state.executing_tids:
                logger.warning("Task #%s still executing", task_id)
                continue
            if self.__local_state.prequeue.push(task_id, self.__prequeue_timeout, logger):
                logger.info("Task #%s pushed to prequeue", task_id)

    @__rpc_method(method_type=jserver.RPC.full)
    def prequeue_pop(self, job=None):
        logger = job.log if job else self._logger
        task_id = self.__local_state.prequeue.pop(self.__prequeue_timeout, logger)
        if task_id:
            logger.info("Task #%s popped from prequeue", task_id)
        return task_id

    @__rpc_method()
    def contenders(self):
        return list(self.__contenders)

    @__rpc_method(allow_secondary=True)
    def get_runtime_option(self, name):
        if name == "commit_quorum_size":
            return self.__commit_quorum_size
        elif name == "skipped_due_disk_space_limit":
            return self.__skipped_due_disk_space_limit
        elif name == "logging_level":
            return self.__logging_level
        elif name == "gc_count":
            return gc.get_count()
        elif name == "gc_threshold":
            return gc.get_threshold()
        raise ValueError("Unknown runtime option: {}".format(name))

    @__rpc_method(method_type=jserver.RPC.full, allow_secondary=True)
    def set_runtime_option(self, name, value, job=None):
        logger = job.log if job else self._logger
        if name == "commit_quorum_size":
            logger.warning("Setting %s to %r, previous value: %r", name, value, self.__commit_quorum_size)
            self.__commit_quorum_size = value
        elif name == "skipped_due_disk_space_limit":
            logger.warning("Setting %s to %r, previous value: %r", name, value, self.__skipped_due_disk_space_limit)
            self.__skipped_due_disk_space_limit = value
        elif name == "logging_level":
            logger.warning("Setting %s to %r, previous value: %r", name, value, self.__logging_level)
            self.__logging_level = value
            logging.root.setLevel(self._config.serviceq.log.level if value is None else value)
        elif name == "gc_threshold":
            prev_value = gc.get_threshold()
            logger.warning("Setting %s to %r, previous value: %r", name, value, prev_value)
            gc.set_threshold(*value)
        else:
            raise ValueError("Unknown runtime option: {}".format(name))

    @__rpc_method(method_type=jserver.RPC.dupgenerator)
    def lock_jobs(self, jobs_ids):
        self.__local_state.locked_jobs_ids.update(common_itertools.chain(jobs_ids))
        try:
            while True:
                to_release = yield
                if not to_release:
                    break
                self.__local_state.locked_jobs_ids.difference_update(common_itertools.chain(to_release))
        finally:
            self.__local_state.locked_jobs_ids.difference_update(common_itertools.chain(jobs_ids))

    @__rpc_method()
    def get_unwanted_contenders(self):
        return self.__state.unwanted_contenders

    @__rpc_method(method_type=jserver.RPC.full)
    def set_unwanted_contenders(self, unwanted_contenders, job=None):
        logger = job.log if job else self._logger
        logger.info("Setting unwanted contenders to %r", unwanted_contenders)
        self.__state.set_unwanted_contenders(self, unwanted_contenders)
        return self.__state.unwanted_contenders

    @__rpc_method(method_type=jserver.RPC.full)
    def add_quota_pool(self, pool, tags, job=None):
        logger = job.log if job else self._logger
        logger.info("Adding pool %s defined by tags %s", pool, tags)
        self.__state.add_quota_pool(self, pool, tags)

    @__rpc_method(method_type=jserver.RPC.full)
    def update_quota_pool(self, pool, tags=None, default=None, job=None):
        """
        :param pool: pool name
        :param tags: list of client tags defining pool
        :param default: default quota for the pool, in microQP
        :param job: joint jon object
        """
        logger = job.log if job else self._logger
        if tags is not None:
            logger.info("Setting tags %s for pool %s", tags, pool)
        if default is not None:
            logger.info("Setting default quota to %s for pool %s", default, pool)
        self.__state.update_quota_pool(self, pool, tags, default)

    @__rpc_method(allow_secondary=True)
    def quota_pools(self):
        return self.__state.quota_pools.encode()
