import ctypes
import base64
from dateutil.parser import parse
import errno
import fcntl
import functools
import grpc
import logging
import os
import random
import time
import threading
import json
import requests
import sys
import subprocess
import traceback
import uuid

from infra.diskmanager.proto import diskman_pb2
from infra.diskmanager.lib import consts
from infra.diskmanager.lib.disk import Disk, IOScheduler, read_sysfs_file, safe_read_sysfs_file, write_sysfs_file
from infra.diskmanager.lib.lvm import LVM
from infra.diskmanager.lib import logger
from infra.diskmanager.lib import limit
from infra.diskmanager.lib import mount
from infra.diskmanager.lib import kernel
from infra.diskmanager.lib import dirutil
import library.python.svn_version as sv

FITRIM = 0xc0185879
EINVAL = grpc.StatusCode.INVALID_ARGUMENT
BYTES_IN_GB = 1024 * 1024 * 1024
ROOTFS_RESERVED_SPACE_PERC = 5

log = logging.getLogger('diskmanager')
logrpc = logger.logrpc(logger=log)


def _ensure_rtc_place_tree():
    bsconfig_dirs = [
        '/db/BASE',
        '/db/bsconfig',
        '/db/bsconfig/webcache',
        '/db/www',
        '/db/www/logs',
        '/db/bin',
    ]
    err = dirutil.ensure_dir('/place/db', 0, 0, 0o775)
    if err:
        return 'failed to ensure /place/db: {}'.format(err)
    err = dirutil.ensure_link('/db', '/place/db')
    if err:
        return 'failed to ensure /db: {}'.format(err)

    for d in bsconfig_dirs:
        err = dirutil.ensure_dir(d, 1049, 1049, 0o775)
        if err:
            return 'failed to ensure {}: {}'.format(d, err)
    err = dirutil.ensure_link('/usr/local/www', '/place/db/www')
    if err:
        return 'failed to ensure /usr/local/www: {}'.format(err)

    if os.path.isdir('/yt'):
        err = dirutil.ensure_dir('/yt', 1049, 1049, 0o755)
        if err:
            return 'failed to ensure /yt: {}'.format(err)
    return None


def _ensure_rtc_ssd_tree():
    # ensure /ssd dir mode only if it is a mountpoint
    if not os.path.islink('/ssd'):
        err = dirutil.ensure_dir('/ssd', 1049, 1049, 0o775)
        if err:
            return 'failed to ensure /ssd: {}'.format(err)

    err = dirutil.ensure_dir('/ssd/webcache', 1049, 1049, 0o775)
    if err:
        return 'failed to ensure /ssd/webcache: {}'.format(err)

    if not os.path.isdir('/ssd/porto_layers'):
        _, err = dirutil.wrap_error(os.mkdir, '/ssd/porto_layers', 0o755)
        if err:
            return 'failed to create /ssd/porto_layers: {}'.format(err)
    if not os.path.isdir('/ssd/porto_volumes'):
        _, err = dirutil.wrap_error(os.mkdir, '/ssd/porto_volumes', 0o755)
        if err:
            return 'failed to create /ssd/porto_volumes: {}'.format(err)


def _ensure_rtc_basesearch_tree(have_ssd=False):
    if have_ssd:
        err = dirutil.ensure_dir('/ssd/basesearch', 1049, 1049, 0o775)
        if err:
            return 'failed to ensure /ssd/basesearch: {}'.format(err)
        err = dirutil.ensure_link('/basesearch', '/ssd/basesearch')
        if err:
            return 'failed to ensure /basesearch: {}'.format(err)
    else:
        err = dirutil.ensure_dir('/place/basesearch', 1049, 1049, 0o775)
        if err:
            return 'failed to ensure /place/basesearch: {}'.format(err)
        err = dirutil.ensure_link('/basesearch', '/place/basesearch')
        if err:
            return 'failed to ensure /basesearch: {}'.format(err)


def require(*fields):
    fields = set(fields)

    def join(what):
        return ', '.join(what)

    def func_wrapper(f):
        @functools.wraps(f)
        def checker(self, request, context):
            request_fields = {f[0].name for f in request.ListFields()}
            missing = fields - request_fields
            if missing:
                msg = 'Missing required fields: %s' % join(missing)
                context.abort(grpc.StatusCode.INVALID_ARGUMENT, msg)
            return f(self, request, context)

        return checker

    return func_wrapper


def synchronized(tlockname):
    """A decorator to place an instance based lock around a method """

    def _synched(func):
        @functools.wraps(func)
        def _synchronizer(self, *args, **kwargs):
            tlock = getattr(self, tlockname)
            tlock.acquire()
            try:
                return func(self, *args, **kwargs)
            finally:
                tlock.release()

        return _synchronizer

    return _synched


def run(args, **kwargs):
    log.debug("run cmd: '{}'".format(" ".join(args)))
    return subprocess.check_call(args, **kwargs)


class FSTrimStat(object):

    def __init__(self, disk_info):
        self._disk_info = disk_info
        self._bytes = 0
        self._scan_bytes = 0
        self._errors = 0
        self._loops = 0

    def inc_errors(self):
        self._errors += 1

    def inc_bytes(self, trim_bytes, scan_bytes):
        self._bytes += trim_bytes
        self._scan_bytes += scan_bytes

    def inc_loops(self):
        self._loops += 1

    def fill_stat_response(self, resp):
        resp.fstrim_bytes += self._bytes
        resp.fstrim_scan_bytes += self._scan_bytes
        resp.fstrim_loops += self._loops
        resp.fstrim_errors += self._errors

    def get_disk_name(self):
        return self._disk_info.disk.name

    def get_yasm_values(self):
        return [
            {
                'name': 'fstrim_bytes_tmmm',
                'val': self._bytes
            },
            {
                'name': 'fstrim_scan_bytes_tmmm',
                'val': self._scan_bytes
            },
            {
                'name': 'fstrim_loops_tmmm',
                'val': self._loops
            },
            {
                'name': 'fstrim_errors_tmmm',
                'val': self._errors
            }
        ]


class State(object):
    def __init__(self, val, desc='', code=0):
        self._val = None
        self._desc = None
        self._code = None
        self._ts = None
        self.set(val, desc, code)

    def set(self, val, desc='', code=0):
        self._val = val
        self._desc = desc
        self._code = code
        self._ts = time.time()

    def cmp_state(self, b):
        if not self._val == b._val:
            return 1
        if not self._desc == b._desc:
            return 2
        if not self._code == b._code:
            return 3

    def copy_ts(self, src):
        if not self.cmp_state(src):
            self._ts = src._ts

    def get_msg(self):
        return '%s, code=%d' % (self._desc, self._code)

    def fill_pb_info(self, c):
        c.last_transition_time.FromSeconds(int(self._ts))
        if self._val:
            c.status = 'True'
        else:
            c.status = 'False'
            c.message = self.get_msg()


def get_iosched_policy(kernel_release, default_io_scheduler):
    iosched_policies = {
        "cfq": {consts.STORAGE_HDD: IOScheduler('cfq', {'group_idle': '8'}),
                consts.STORAGE_SSD: {'default': IOScheduler('cfq', {'group_idle': '0'}),
                                     'kyber': IOScheduler(*consts.KYBER_OPTIONS)},
                consts.STORAGE_NVME: {'none': IOScheduler('none'),
                                      'kyber': IOScheduler(*consts.KYBER_OPTIONS)}
                },

        "bfq": {consts.STORAGE_HDD: IOScheduler('bfq'),
                consts.STORAGE_SSD: {'default': IOScheduler('bfq'),
                                     'kyber': IOScheduler(*consts.KYBER_OPTIONS)},
                consts.STORAGE_NVME: {'none': IOScheduler('none'),
                                      'kyber': IOScheduler(*consts.KYBER_OPTIONS)}
                },

        "mq-deadline": {consts.STORAGE_HDD: IOScheduler('mq-deadline'),
                        consts.STORAGE_SSD: {'default': IOScheduler('mq-deadline'),
                                             'kyber': IOScheduler(*consts.KYBER_OPTIONS)},
                        consts.STORAGE_NVME: {'none': IOScheduler('none'),
                                              'kyber': IOScheduler(*consts.KYBER_OPTIONS)}
                        }
    }

    if kernel_release[0] == 4 and kernel_release[1] < 19:
        return iosched_policies["cfq"]

    # Workaround for https://st.yandex-team.ru/KERNEL-420, disable bfq kernel before 4.19.131-33
    if kernel_release[0] == 4 and kernel_release[1] == 19 and kernel_release[2] < 131 and kernel_release[3] < 33:
        return iosched_policies["mq-deadline"]

    return iosched_policies[default_io_scheduler]


def get_mpolicy(kernel_release):
    mp = {
        'ext4': {
            'DEFAULT': mount.MountConfig(['data=ordered', 'lazytime'],
                                         [mount.MountOptBin('barrier', 'nobarrier', True, True),
                                          mount.MountOptBin('lazytime', 'nolazytime', False, True),
                                          mount.MountOptBin('discard', 'nodiscard', False, False)],
                                         ['ro', 'abort'],
                                         project_quota=True,
                                         reserved_space=5,
                                         mkfs_opts={'opts': ['has_journal', 'quota'],
                                                    'ext_opts': ['quotatype=prjquota', 'nodiscard']}),  # 'root_owner' is added later
            'SAFE_LAZYTIME': mount.MountConfig(['data=ordered', 'lazytime', 'nodev', 'nosuid'],
                                               [mount.MountOptBin('barrier', 'nobarrier', True, True),
                                                mount.MountOptBin('lazytime', 'nolazytime', False, True),
                                                mount.MountOptBin('discard', 'nodiscard', False, False),
                                                mount.MountOptBin('nodev', 'dev', False, True),
                                                mount.MountOptBin('nosuid', 'suid', False, True)],
                                               ['ro', 'abort'],
                                               project_quota=True,
                                               reserved_space=0,
                                               mkfs_opts={'opts': ['has_journal', 'quota'],
                                                          'ext_opts': ['quotatype=prjquota', 'nodiscard']}),  # 'root_owner' is added later
            'SAFE_NO_LAZYTIME': mount.MountConfig(['data=ordered', 'nolazytime', 'nodev', 'nosuid'],
                                                  [mount.MountOptBin('barrier', 'nobarrier', True, True),
                                                   mount.MountOptBin('lazytime', 'nolazytime', False, False),
                                                   mount.MountOptBin('discard', 'nodiscard', False, False),
                                                   mount.MountOptBin('nodev', 'dev', False, True),
                                                   mount.MountOptBin('nosuid', 'suid', False, True)],
                                                  ['ro', 'abort'],
                                                  project_quota=True,
                                                  reserved_space=0,
                                                  mkfs_opts={'opts': ['has_journal', 'quota'],
                                                             'ext_opts': ['quotatype=prjquota', 'nodiscard']}),  # 'root_owner' is added later
            'UNSAFE': mount.MountConfig(['nobarrier', 'noload', 'noquota'],
                                        [mount.MountOptBin('discard', 'nodiscard', False, False)],
                                        ['ro', 'abort'],
                                        project_quota=False,
                                        reserved_space=0,
                                        mkfs_opts={'opts': ['^has_journal', '^quota'],
                                                   'ext_opts': ['nodiscard']}),  # 'root_owner' is added later
        }
    }
    mp['ext4']['SAFE'] = mp['ext4']['SAFE_LAZYTIME']
    mp['ext4']['ROOTFS'] = mp['ext4']['DEFAULT']

    # Disable lazytime for yt disks on kernels ( v4.4, 4.19 <4.19.100-23) https://st.yandex-team.ru/DISKMAN-78
    if kernel_release[0] == 4 and (kernel_release[1] < 19 or (kernel_release[1] == 19 and kernel_release[2] < 100 and kernel_release[3] < 23)):
        log.info("DISKMAN-78: disable lazytime for YT disks on kernel: %s", str(kernel_release))
        mp['ext4']['YT'] = mp['ext4']['SAFE_NO_LAZYTIME']
    else:
        log.info("DISKMAN-78: enable lazytime for YT disks on kernel: %s", str(kernel_release))
        mp['ext4']['YT'] = mp['ext4']['SAFE_LAZYTIME']

    return mp

_kernel_release = kernel.kernel_version()
MPOLICY = get_mpolicy(_kernel_release)


class DiskInfo(object):
    SYSPART_LIST = {'/': MPOLICY['ext4']['DEFAULT'],
                    '/home': MPOLICY['ext4']['DEFAULT'],
                    '/place': MPOLICY['ext4']['SAFE'],
                    '/ssd': MPOLICY['ext4']['SAFE'],
                    '/yt/disk0': MPOLICY['ext4']['YT'],
                    '/yt/disk1': MPOLICY['ext4']['YT'],
                    '/yt/disk2': MPOLICY['ext4']['YT'],
                    '/yt/disk3': MPOLICY['ext4']['YT'],
                    '/yt/disk4': MPOLICY['ext4']['YT'],
                    '/yt/disk5': MPOLICY['ext4']['YT'],
                    '/yt/disk6': MPOLICY['ext4']['YT'],
                    '/yt/disk7': MPOLICY['ext4']['YT'],
                    '/yt/disk8': MPOLICY['ext4']['YT'],
                    '/yt/nvme0': MPOLICY['ext4']['YT'],
                    '/yt/nvme1': MPOLICY['ext4']['YT'],
                    '/yt/nvme2': MPOLICY['ext4']['YT'],
                    '/yt/nvme3': MPOLICY['ext4']['YT'],
                    '/yt/nvme4': MPOLICY['ext4']['YT'],
                    '/yt/nvme5': MPOLICY['ext4']['YT'],
                    '/yt/nvme6': MPOLICY['ext4']['YT'],
                    '/yt/nvme7': MPOLICY['ext4']['YT'],
                    '/yt/nvme8': MPOLICY['ext4']['YT'],
                    '/yt/ssd0': MPOLICY['ext4']['YT'],
                    '/yt/ssd1': MPOLICY['ext4']['YT'],
                    '/yt/ssd2': MPOLICY['ext4']['YT'],
                    '/yt/ssd3': MPOLICY['ext4']['YT'],
                    '/yt/ssd4': MPOLICY['ext4']['YT'],
                    '/yt/ssd5': MPOLICY['ext4']['YT'],
                    '/yt/ssd6': MPOLICY['ext4']['YT'],
                    '/yt/ssd7': MPOLICY['ext4']['YT'],
                    '/yt/ssd8': MPOLICY['ext4']['YT']}

    def __init__(self, name, dm):
        self.disk = Disk(name)
        self.dm = dm
        self._vg_name = ''
        self._pv_uuid = ''
        self._vg_uuid = ''
        self._ts = time.time()
        self._fstrim_policy = diskman_pb2.DiskSpec.AUTO
        self._fstrim_max_len = consts.DEFAULT_FSTRIM_MAX_CHUNK
        self._fstrim_min_len = consts.DEFAULT_FSTRIM_MIN_CHUNK
        self._fstrim_wait_factor = consts.DEFAULT_FSTRIM_WAIT_FACTOR
        self._fstrim_task = None
        self._fstrim_last_vol = None
        self._fstrim_offset = 0
        self._fstrim_ts = time.time() - dm._fstrim_period
        self._fstrim_end_ts = time.time()
        self._fstrim_last_part = 0
        self._custom_labels = {}
        # By default diskInfo initialized in  notready,unconfigured state
        # It will be marked as configured once valid VGroupInfo found
        self._s_ready = State(False, 'Disk not configured yet')
        self._s_configured = State(False, 'No valid vg found')
        self._s_error = State(False)
        self._s_absent = State(False)
        self._vg_size = 0
        self._vpool_capacity_bytes = 0
        self._allocatable_bytes = 0
        self._extent_size = 1
        self._iolimit = limit.IOLimitInfo(self.disk.major + ":" + self.disk.minor)
        self._yp_io_limits = self.dm.yp_io_limits.get(self.disk)

        if not self.disk.uuid:
            self._s_configured.set(False, 'Partition table not found')
            self._s_ready.set(False, 'Disk not configured yet')

    def set_fstrim_limit(self, max_len, min_len, wait_factor):
        self._fstrim_max_len = max_len
        self._fstrim_min_len = min_len
        self._fstrim_wait_factor = wait_factor

    def long_name(self):
        return '%s(%s)' % (self.disk.device_path, self.disk.id)

    def _update_status(self):
        if self._s_absent._val:
            self._s_ready.set(False, 'Disk absent')
            self.stop_bg_threads(wait=True)
            return
        if self._s_error._val:
            self._s_ready.set(False, 'Disk error')
            self.stop_bg_threads(wait=True)
            return
        if not self._s_configured._val:
            self._s_ready.set(False, 'Disk not configured yet')
            return
        self._s_ready.set(True)

    def may_import_as_new(self):
        if self._s_absent._val:
            return False
        if self._s_error._val:
            return False
        if self._s_configured._val:
            return False
        if self.disk.uuid:
            return False
        if self._all_parts():
            return False
        if self.disk.size < BYTES_IN_GB * 2:
            return False
        return True

    def config_fstrim(self):
        if self._fstrim_policy == diskman_pb2.DiskSpec.FSTrimPolicy.Value('DISABLED'):
            return False
        if self._s_absent._val or self._s_error._val:
            return False
        if self._fstrim_task is None:
            self._fstrim_task = FSTrimJob(self.dm, self.disk.id)
            self._fstrim_task.start()
        return True

    def wakeup_fstrim(self):
        if self._fstrim_task is not None:
            self._fstrim_task.wakeup()

    def stop_bg_threads(self, wait):
        if self._fstrim_task is not None:
            self._fstrim_task.notify_stop()
            if wait:
                self._fstrim_task.join()

    def transfer_state(self, src):
        self._s_ready.copy_ts(src._s_ready)
        self._s_configured.copy_ts(src._s_configured)
        self._s_error.copy_ts(src._s_error)
        self._s_absent.copy_ts(src._s_absent)
        self._fstrim_offset = src._fstrim_offset
        self._fstrim_ts = src._fstrim_ts
        self._fstrim_end_ts = src._fstrim_end_ts
        self._fstrim_last_part = src._fstrim_last_part

        self._fstrim_task = src._fstrim_task
        src._fstrim_task = None

    def add_vg(self, name, uuid, pv_uuid, vg_size, allocatable_bytes, extent_size, discard_type, custom_labels, vg_foreign):
        if self._vg_name and (not self._vg_name == name):
            if self.disk.discard_support:
                try:
                    self._fstrim_policy = diskman_pb2.DiskSpec.FSTrimPolicy.Value(discard_type)
                except Exception:
                    self._fstrim_policy = diskman_pb2.DiskSpec.FSTrimPolicy.Value('AUTO')
            self._s_configured = State(False, 'Multiple VG for disk is not supported')
            self._update_status()
            log.error('Found new VG:%s, but disk %s already has' %
                      (name, self._vg_name))
            return
        # Valid config found,
        self._vg_name = name
        self._vg_uuid = uuid
        self._pv_uuid = pv_uuid
        self._custom_labels = custom_labels

        if vg_foreign:
            self._s_configured = State(False, 'VG for this disk is foreign')
        else:
            self._s_configured.set(True)

        self._allocatable_bytes = allocatable_bytes
        self._vg_size = vg_size
        self._vpool_capacity_bytes = vg_size
        self._extent_size = extent_size
        self._update_status()

    def fill_pb_info(self, di):
        di.meta.id = self.disk.id
        # Fill Disk.spec
        di.spec.storage_class = self.disk.storage_class
        di.spec.device_path = self.disk.device_path
        di.spec.sysfs_path = self.disk.sysfs_path
        di.spec.capacity_bytes = self.disk.size
        di.spec.vpool_capacity_bytes = self._vpool_capacity_bytes
        di.spec.serial = self.disk.serial
        di.spec.model = self.disk.model
        di.spec.fstrim = self._fstrim_policy
        # di.spec.physical_id = self.msg.spec.physical_id
        # di.spec.sw_version = self.msg.spec.sw_version
        di.spec.layout_id = self.disk.uuid
        di.spec.labels.update(self._custom_labels)
        di.spec.extent_size_bytes = self._extent_size
        di.spec.scheduler = self.disk.scheduler.name
        di.spec.major = int(self.disk.major)
        di.spec.minor = int(self.disk.minor)
        if self._yp_io_limits is not None:
            di.spec.io_limits.CopyFrom(self._yp_io_limits)
        # Partition info
        for p in self.disk.parts:
            rp = di.spec.partitions.add()
            rp.start_bytes = p.start
            rp.size_bytes = p.size
            rp.type = p.type
            rp.uuid = p.uuid
            rp.fs_label = p.fs_label
            rp.fs_uuid = p.fs_uuid
            rp.volume_source = False
            if self._pv_uuid and self._pv_uuid == p.fs_uuid:
                rp.volume_source = True
        # Fill Disk.status
        di.status.allocatable_bytes = self._allocatable_bytes
        self._s_ready.fill_pb_info(di.status.ready)
        self._s_configured.fill_pb_info(di.status.configured)
        self._s_error.fill_pb_info(di.status.error)
        self._s_absent.fill_pb_info(di.status.absent)
        di.status.last_scan.FromSeconds(int(self._ts))
        di.status.last_fstrim.FromSeconds(int(self._fstrim_ts))

    def _all_parts(self):
        if self.disk.virt_part:
            return [self.disk.virt_part]
        else:
            return self.disk.parts

    def next_fstrim(self, ts):
        if (self._fstrim_last_part == 0) and (self._fstrim_offset == 0) and (self._fstrim_ts > ts):
            return None, ts

        p_list = self._all_parts()
        for p_idx in range(self._fstrim_last_part, len(p_list)):
            p = p_list[p_idx]
            if p.mnt_path and p.fs_type in VolumeInfo.FSTRIM_SUPPORTED_FS:
                self._fstrim_last_part = p_idx
                return (p, self._fstrim_ts)
            self._fstrim_offset = 0
        return None, ts

    # Returns True if completes one with one filesystem
    def fstrim_iter_advance(self, count, update_ts=False):
        done = False
        p_list = self._all_parts()
        self._fstrim_offset = self._fstrim_offset + count
        if self._fstrim_offset > p_list[self._fstrim_last_part].size:
            self._fstrim_offset = 0
            done = True
            self._fstrim_last_part = self._fstrim_last_part + 1
            if self._fstrim_last_part >= len(p_list):
                self._fstrim_last_part = 0
                update_ts = True
        if update_ts:
            self._fstrim_ts = self._fstrim_end_ts
            self._fstrim_end_ts = time.time()
        return done

    def check_ioscheduler(self):
        tgt = self.dm.iosched_policy.get(self.disk.storage_class)
        if tgt is None or self.disk.name.startswith('md'):
            return
        if self.disk.storage_class == consts.STORAGE_NVME:
            for p in self.disk.parts:
                if p.mnt_path and p.mnt_path.startswith('/yt/'):
                    kyber_models = self.dm.enabled_kyber_models_yt
                    break
            else:
                kyber_models = self.dm.enabled_kyber_models
            for model in kyber_models:
                if self.disk.serial.find(model) > -1:
                    tgt = tgt.get('kyber')
                    break
            else:
                tgt = tgt.get('none')
        elif self.disk.storage_class == consts.STORAGE_SSD:
            if self.dm.enabled_kyber_for_ssd:
                tgt = tgt.get('kyber')
            else:
                tgt = tgt.get('default')

        if not self.disk.scheduler.equal(tgt):
            self.disk.set_scheduler(tgt)

    # DISKMAN-122: set io limits on SAMSUNG MZWLL3T2HMJP-00003 and MZWLR7T6HALA-00007 disks
    # HOSTMAN-1345: remove these limits for disks with kyber scheduler
    def check_samsung_io_limits(self):
        for bad_disk_name in consts.SAMSUNG_BAD_DISKS:
            if self.disk.serial.startswith(bad_disk_name):
                targer_write_bps = consts.SAMSUNG_IOLIM_WRITE_BPS
                targer_max_sectors_kb = consts.SAMSUNG_MAX_SECTORS_KB
                for p in self._all_parts():
                    if (p.mnt_path and p.mnt_path.startswith('/yt/')) or self.disk.scheduler.equal(IOScheduler(*consts.KYBER_OPTIONS)):
                        # restore default values for YT-disks and kyber scheduler
                        targer_write_bps = consts.SAMSUNG_DEFAULT_IOLIM_WRITE_BPS
                        targer_max_sectors_kb = consts.SAMSUNG_DEFAULT_MAX_SECTORS_KB
                        break

                if self._iolimit._write_bps != targer_write_bps:
                    log.info("DISKMAN-122: set write bps limit on %s disk: %s, to %d bps", bad_disk_name, self.disk.device_path, targer_write_bps)
                    self._iolimit.set_write_bps(targer_write_bps)

                max_sectors_kb = safe_read_sysfs_file(self.disk.sysfs_path, 'queue/max_sectors_kb')
                if max_sectors_kb and max_sectors_kb != str(targer_max_sectors_kb):
                    log.info("DISKMAN-122: set max_sectors_kb on %s disk: %s, to %d", bad_disk_name, self.disk.device_path, targer_max_sectors_kb)
                    write_sysfs_file(str(targer_max_sectors_kb), self.disk.sysfs_path, 'queue/max_sectors_kb')

    def check_mount_state(self):
        if self._s_error._val:
            return False
        for p in self._all_parts():
            if not p.mnt_path:
                continue
            if p.fs_type not in VolumeInfo.FSTRIM_SUPPORTED_FS:
                continue
            if p.mnt_path not in DiskInfo.SYSPART_LIST:
                continue
            mp = DiskInfo.SYSPART_LIST[p.mnt_path]
            m_inf = mount.find_by_path(p.mnt_path)
            if not mp.is_healthy(m_inf.opts):
                self._s_error.set(True, 'FS error on mnt:%s partition %s ' % (p.mnt_path, p.name))
                return False

            if not mp.need_remount(m_inf.opts):
                continue

            new_opt_list = mp.make_opt(m_inf.opts)
            new_opts = ','.join(new_opt_list)
            log.info('Remount %s old_opts:%s, new_opts:%s' % (p.mnt_path, m_inf.opts, new_opts))
            try:
                self.dm._stat_remounts += 1
                Disk.remount_fs(m_inf.path, new_opts)
            except OSError as e:
                log.error("remount failed with:%s" % str(e))
                self._s_error.set(False, 'fail to remount dev:{}, at:{}, from:{} to:{}'.format(p.name, p.mnt_path, m_inf.opts, new_opts))
                self.dm._stat_remount_errors += 1
                return False
        return True


class VGroupInfo(object):
    def __init__(self, vg_info, hw_info):
        self._disk_id = ''
        self._owner = False
        self._name = vg_info['vg_name']
        self._uuid = vg_info['vg_name']
        self._pv_uuid = vg_info['pv_uuid']
        self._pv_name = vg_info['pv_name']
        self._allocatable_bytes = LVM.get_capacity(vg_info.get('vg_free', '0'))
        self._vg_size = LVM.get_capacity(vg_info.get('vg_size', '0'))
        self._extent_size = LVM.get_capacity(vg_info['vg_extent_size'])

        self._foreign = False
        if not vg_info['vg_tags'].get('diskman', '') == 'true':
            log.debug("Foreign vg: %s found" % self._name)
            self._foreign = True

        disk_name = Disk.find_device(self._pv_name)
        if not disk_name:
            log.info('Cant find disk for pv:%s' % self._name)
            return

        di = hw_info.lookup_disk_by_name(disk_name)
        if not di:
            log.info('Cant find disk %s in cache' % disk_name)
            return

        custom_labels = {}
        for k, v in vg_info['vg_tags'].iteritems():
            if k.startswith('diskman.label_'):
                k = k[len('diskman.label_'):]
                custom_labels[k] = v

        assert di.disk.id
        self._disk_id = di.disk.id
        self._owner = True
        discard_type = vg_info['vg_tags'].get('diskman.fstrim', 'AUTO')
        di.add_vg(self._name, self._uuid, self._pv_uuid, self._vg_size, self._allocatable_bytes, self._extent_size, discard_type, custom_labels, self._foreign)


class VolumeException(Exception):
    pass


class VolumeExceptionBusy(VolumeException):
    pass


class VolumeInfo(object):
    FSTRIM_SUPPORTED_FS = ['ext4', 'xfs']
    # TODO: There is probably a better way to make constants with short names
    MP_NONE = diskman_pb2.VolumeSpec.MountVolume.MountPolicy.Value('NONE')
    MP_DEFAULT = diskman_pb2.VolumeSpec.MountVolume.MountPolicy.Value('DEFAULT')
    MP_SAFE = diskman_pb2.VolumeSpec.MountVolume.MountPolicy.Value('SAFE')
    MP_UNSAFE = diskman_pb2.VolumeSpec.MountVolume.MountPolicy.Value('UNSAFE')
    MP_ROOTFS = diskman_pb2.VolumeSpec.MountVolume.MountPolicy.Value('ROOTFS')
    MP_CUSTOM = diskman_pb2.VolumeSpec.MountVolume.MountPolicy.Value('CUSTOM')

    def __init__(self, lv_info, hw_info, dm):
        self.dm = dm
        self._hw_info = hw_info
        self._disk_id = ''
        self._ts = time.time()
        self._lv_info = lv_info
        self._name = lv_info.get('lv_name')
        self._uuid = lv_info.get('lv_uuid')
        self._device_path = lv_info.get('lv_path')
        self._vg_name = lv_info.get('vg_name')
        self._size = LVM.get_capacity(lv_info.get('lv_size', '0'))
        self._is_open = lv_info.get('lv_device_open')
        self._major = lv_info.get('lv_kernel_major')
        self._minor = lv_info.get('lv_kernel_minor')
        ct_str = lv_info.get('lv_time', 'Thu, 01 Jan 1970 00:00:00 +0000')
        ct_dt = parse(ct_str)
        self._creat_ts = time.mktime(ct_dt.timetuple())
        self._udev_info = Disk.get_udev_info(self._major, self._minor)
        self._s_ready = State(False)
        self._s_configured = State(False)
        self._s_manageable = State(False)
        self._s_error = State(False)
        self._s_absent = State(False)
        self._fstrim_policy = True
        self._fstrim_offset = 0
        self._fstrim_ts = time.time() - dm._fstrim_period
        self._fstrim_end_ts = time.time()
        self._mnt_path = mount.find_mount_path(self._major, self._minor, hw_info._mount_info)
        self._found_fs_type = self._udev_info.get('ID_FS_TYPE', '').lower()
        self._iolimit = limit.IOLimitInfo(self._major + ":" + self._minor)
        self._access_type = 'block'
        self._mount_policy = VolumeInfo.MP_CUSTOM
        self._is_static = False
        self._custom_labels = {}
        for k, v in lv_info['lv_tags'].iteritems():
            if k.startswith('diskman.label_'):
                k = k[len('diskman.label_'):]
                self._custom_labels[k] = v

        self._check_config(hw_info)

    def transfer_state(self, src):
        self._s_ready.copy_ts(src._s_ready)
        self._s_configured.copy_ts(src._s_configured)
        self._s_manageable.copy_ts(src._s_manageable)
        self._s_error.copy_ts(src._s_error)
        self._s_absent.copy_ts(src._s_absent)

        self._fstrim_offset = src._fstrim_offset
        self._fstrim_ts = src._fstrim_ts
        self._fstrim_end_ts = src._fstrim_end_ts

    def is_manageable(self):
        return self._s_manageable._val

    def long_name(self):
        return '%s(%s)' % (self._name, self._uuid)

    def _decode_mount_flags(self):
        o = self._lv_info['lv_tags'].get('diskman.mount_flags', '')
        if o.startswith('base64:'):
            self._mount_flags = base64.urlsafe_b64decode(o[7:])
        else:
            self._mount_flags = o

    def _check_config(self, hw_info):
        vg = hw_info.lookup_vg_by_name(self._vg_name)
        if not vg:
            self._s_configured.set(False, "VG %s not found in cache" % self._vg_name)
            return
        if not vg._owner:
            self._s_configured.set(False, "Volume belongs to unconfigured VG: %s" % vg._name)
            return

        di = hw_info.lookup_disk_by_id(vg._disk_id)
        if not di:
            self._s_configured.set(False, "Can not find parent disk %s" % vg._disk_id)
            return

        if (self._lv_info['lv_tags'].get('diskman.sys') == 'true' or
                self._custom_labels.get('diskman.sys') == 'true' or
                self._mnt_path in consts.SYSTEM_MOUNT_POINTS):
            self._is_static = True
            self._s_manageable.set(False, 'Volume is marked as static-system object')

            # Don't account system volumes in 'vpool_capacity_bytes'
            #
            # Diskman won't be able to delete system lv, since it's not-manageable,
            # so it needs to be done via LVM manually. Thus, diskman can't track when
            # lv was deleted, but there's no need in tracking deletion and adding
            # back system volume size as after cache update (which means rebuilding
            # all *Info classes) diskman won't discover deleted volume and it's size
            # won't be substracted from 'vpool_capacity_bytes'.
            di._vpool_capacity_bytes -= self._size
        else:
            self._s_manageable.set(True)

        # maybe move "di._vpool_capacity_bytes -= self._size" below this check
        self._disk_id = di.disk.id
        if not di._s_ready._val:
            self._s_configured.set(False, "Parent disk %s is not ready" % vg._disk_id)
            return

        if not self._lv_info['lv_tags'].get('diskman', 'false') == 'true':
            self._s_configured.set(False, "Foreign volume, 'diskman' tag not found")
            self._update_status()
            return

        # Parse access_type config
        self._access_type = self._lv_info['lv_tags'].get('diskman.access_type', 'block')
        if self._access_type == 'mount':
            self._root_owner_uid = int(self._lv_info['lv_tags'].get('diskman.root_owner_uid', 0))
            self._root_owner_gid = int(self._lv_info['lv_tags'].get('diskman.root_owner_gid', 0))
            self._fs_type = self._lv_info['lv_tags'].get('diskman.fs_type', 'ext4')
            fstrim_str = self._lv_info['lv_tags'].get('diskman.fstrim', 'True')
            self._fstrim_policy = bool(int(fstrim_str))
            mnt_policy = self._lv_info['lv_tags'].get('diskman.mount_policy', 'NONE')

            try:
                self._mount_policy = diskman_pb2.VolumeSpec.MountVolume.MountPolicy.Value(mnt_policy)
            except ValueError:
                self._s_configured.set(False, "vol:%s has unknown mount_policy:%s", (self._name, mnt_policy))
                self._update_status()
                return

            # Update old volume config
            if self._mount_policy == VolumeInfo.MP_NONE:
                def_policy = "DEFAULT"
                log.info('Upgrade volume %s mount_policy, force set it to %s', self._name, def_policy)
                self._mount_policy = diskman_pb2.VolumeSpec.MountVolume.MountPolicy.Value(def_policy)
                # Update volume config
                add_tags = ["diskman.mount_policy=%s" % def_policy]
                del_tags = []
                old_policy = self._lv_info['lv_tags'].get('diskman.mount_policy')
                old_fl = self._lv_info['lv_tags'].get('diskman.mount_flags')
                # Delete old custom mount_flags if any
                if old_policy:
                    del_tags.append("diskman.mount_policy=%s" % old_policy)
                if old_fl:
                    del_tags.append("diskman.mount_flags=%s" % old_fl)
                    del self._lv_info['lv_tags']['diskman.mount_flags']
                LVM.change_lv_tags(self._name, self._vg_name, add_tags, del_tags)
            self._decode_mount_flags()

        if self._access_type == 'block':
            self._s_configured.set(True)
            self._update_status()
        else:
            self.check_fs_config()

    def get_mnt_policy(self):
        mnt_policy = diskman_pb2.VolumeSpec.MountVolume.MountPolicy.Name(self._mount_policy)
        return MPOLICY[self._fs_type][mnt_policy]

    def check_mount_state(self):
        if self._s_error._val:
            return False

        m_inf = mount.find_by_dev(self._major, self._minor, self._hw_info._mount_info)
        if m_inf is None:
            return True

        mp = None
        if m_inf.path in DiskInfo.SYSPART_LIST:
            mp = DiskInfo.SYSPART_LIST[m_inf.path]
        if mp is None:
            # TODO check more fs error here, before mount exit
            if self._mount_policy == VolumeInfo.MP_CUSTOM:
                return True
            mp = self.get_mnt_policy()

        if not mp.is_healthy(m_inf.fs_opts):
            self._s_error.set(True, 'Volume %s fs-error detected' % self._name)
            return False

        all_opts = m_inf.fs_opts + m_inf.opts
        if mp.need_remount(all_opts):
            new_opt_list = mp.make_opt(m_inf.fs_opts)
            new_opts = ','.join(new_opt_list)
            log.info('Remount %s old_opts:%s, new_opts:%s' % (self._name, all_opts, new_opts))
            try:
                self.dm._stat_remounts += 1
                Disk.remount_fs(m_inf.path, new_opts)
            except OSError as e:
                log.error("remount failed with:%s" % str(e))
                self._s_error.set(False, 'Volume %s can not be remounted to target mnt_opts' % self._name)
                self.dm._stat_remount_errors += 1
                return False

        if (self._mount_policy != VolumeInfo.MP_CUSTOM or self._is_static is True):
            dev_path = os.path.realpath(self._device_path)
            res_blks = int(read_sysfs_file("/sys/fs/ext4/{}/reserved_clusters".format(os.path.basename(dev_path))))
            stats = os.statvfs(self._mnt_path)

            # 'f_blocks' (from 'statfs()') is actually slightly less (by 'overhead' blocks)
            # than "true" total 'Block count' (reported by 'tune2fs'):
            # https://bb.yandex-team.ru/projects/KERNEL/repos/linux/browse/fs/ext4/super.c?at=refs%2Ftags%2Fv4.19.143-37#5609
            #
            # so, there'll be small discrepancy in blocks, between setting n% reserve via 'tune2fs'
            # and checking how much blocks are reserved using 'f_blocks' from 'statfs()' and
            # calculating "f_blocks * perc_reserved_space / 100" (calculated block count will be less than n%)
            root_res_blks = int(stats.f_blocks * mp.reserved_space / 100)
            if (stats.f_bfree - stats.f_bavail) != (root_res_blks + res_blks):
                cmd = ['tune2fs', '-r', str(root_res_blks), dev_path]
                log.info("fixing reserved blocks on '%s' to %s" % (self._device_path, root_res_blks))
                log.debug("exec : %s" % cmd)
                subprocess.check_call(cmd)

        return True

    def check_fs_config(self):
        assert (self._access_type == 'mount')
        if self._found_fs_type:
            if not self._found_fs_type == self._fs_type:
                self._s_configured.set(False, 'Unexpected filesystem found %s, expect:%s' %
                                       (self._fs_type, self._found_fs_type), errno.EINVAL)
                log.debug('Bad config: vol: %s, msg: %s' % (self._name, self._s_configured.get_msg()))
            else:
                self._s_configured.set(True)
        else:
            if self.is_manageable():
                mkfs_opts = {}
                reserved_space = None
                if self._mount_policy != VolumeInfo.MP_CUSTOM:
                    mp = self.get_mnt_policy()
                    mkfs_opts = mp.get_mkfs_opts()

                    if self._fs_type == 'ext4':
                        mkfs_opts['ext_opts'].append('root_owner={}:{}'.format(self._root_owner_uid, self._root_owner_gid))
                        reserved_space = mp.reserved_space

                Disk.make_fs(self._device_path, self._fs_type,
                             mkfs_opts.get('opts', []), mkfs_opts.get('ext_opts', []),
                             reserved_space)
                self._s_configured.set(True)
            else:
                self._s_configured.set(False, 'Volume %s has not valid fs and not manageable' % self._name)
        self._update_status()

    def _update_status(self):
        if self._s_absent._val:
            self._s_ready.set(False, 'Volume absent')
            return
        if self._s_error._val:
            self._s_ready.set(False, 'Volume error')
            return
        if not self._s_manageable._val:
            self._s_ready.set(False, 'Volume is not managable')
            return
        if not self._s_configured._val:
            self._s_ready.set(False, 'Volume not configured')
            return
        self._s_ready.set(True)

    def fill_spec(self, spec):
        spec.name = self._name
        spec.disk_id = self._disk_id
        spec.capacity_bytes = self._size
        spec.labels.update(self._custom_labels)
        if self._access_type == 'mount':
            spec.mount.fs_type = self._fs_type
            spec.mount.mount_flags = self._mount_flags
            spec.mount.mount_policy = self._mount_policy
            spec.mount.root_owner.uid = self._root_owner_uid
            spec.mount.root_owner.gid = self._root_owner_gid
        else:
            spec.block.stub = True

        di = self._hw_info.lookup_disk_by_id(self._disk_id)
        spec.storage_class = di.disk.storage_class if di is not None else ""

        spec.major = int(self._major)
        spec.minor = int(self._minor)

    def cmp_spec(self, spec):
        if not spec.name == self._name:
            return 1
        if not spec.disk_id == self._disk_id:
            return 2
        if not spec.capacity_bytes == self._size:
            return 3
        if not spec.WhichOneof("access_type") == self._access_type:
            return 4
        if not self.is_manageable():
            return 5
        if self._access_type == 'mount':
            if not spec.mount.fs_type == self._fs_type:
                return 6
            if not spec.mount.mount_flags == self._mount_flags:
                return 7
            if not spec.mount.mount_policy == self._mount_policy:
                return 8
            if not spec.mount.root_owner.uid == self._root_owner_uid:
                return 9
            if not spec.mount.root_owner.gid == self._root_owner_gid:
                return 10

        for k, v in spec.labels.iteritems():
            if k not in self._custom_labels:
                return 11
            if self._custom_labels[k] != v:
                return 12

        return 0

    def fill_pb_info(self, lv):
        lv.meta.id = self._uuid
        lv.meta.creation_time.FromSeconds(int(self._creat_ts))
        self.fill_spec(lv.spec)
        if self._is_open and self._mnt_path:
            lv.status.mount_path = self._mnt_path
        self._s_ready.fill_pb_info(lv.status.ready)
        self._s_manageable.fill_pb_info(lv.status.manageable)
        self._s_configured.fill_pb_info(lv.status.configured)
        self._s_error.fill_pb_info(lv.status.error)
        self._s_absent.fill_pb_info(lv.status.absent)
        lv.status.last_scan.FromSeconds(int(self._ts))
        lv.status.last_fstrim.FromSeconds(int(self._fstrim_ts))
        # Fill limits
        li = lv.status.iolimit
        li.read.ops_per_second = self._iolimit._read_iops
        li.read.bytes_per_second = self._iolimit._read_bps
        li.write.ops_per_second = self._iolimit._write_iops
        li.write.bytes_per_second = self._iolimit._write_bps

    def mount(self, path):
        if self._mnt_path:
            if self._mnt_path == path:
                # Already mounted, do nothing
                return
            else:
                raise VolumeExceptionBusy('Volume is busy by another mount: %s' % self._mnt_path)
        else:
            try:
                if not os.path.exists(path):
                    os.makedirs(path)

                project_quota_on = False
                if self._mount_policy == VolumeInfo.MP_CUSTOM:
                    mnt_opt = self._mount_flags
                else:
                    mp = self.get_mnt_policy()
                    mnt_opt = mp.get_defaults()
                    project_quota_on = mp.project_quota

                Disk.mount_fs(self._device_path, path, self._fs_type, mnt_opt)
                if project_quota_on:
                    mount.project_quota_on(path)

            except OSError as e:
                raise VolumeException(e.args)
        self._mnt_path = mount.find_mount_path(self._major, self._minor)
        self._is_open = True
        # Sanity check
        if not self._mnt_path == path:
            raise VolumeException('Found unexpected mount path %s, expect %s' % (self._mnt_path, path))
        di = self._hw_info.lookup_disk_by_id(self._disk_id)
        if di:
            di.wakeup_fstrim()

    def umount(self):
        if self._mnt_path:
            try:
                Disk.umount_fs(self._mnt_path)
            except OSError as e:
                raise VolumeException(e.args)

            self._mnt_path = mount.find_mount_path(self._major, self._minor)
            # Sanity check
            if self._mnt_path:
                raise VolumeException('Fail to umount volume, it still has mnt: %s' % self._mnt_path)
        else:
            # Already mounted, do nothing
            pass
        self._is_open = False

    def set_iolimit(self, read_iops, read_bps, write_iops,  write_bps):
        self._iolimit.set_read_iops(read_iops)
        self._iolimit.set_read_bps(read_bps)
        self._iolimit.set_write_iops(write_iops)
        self._iolimit.set_write_bps(write_bps)

    def may_fstrim(self):
        if not self._fstrim_policy:
            return False
        if not self._mnt_path:
            return False
        if self._found_fs_type not in VolumeInfo.FSTRIM_SUPPORTED_FS:
            return False
        return True

    def need_fstrim(self, ts):
        if not self.may_fstrim():
            return False
        if (self._fstrim_offset == 0) and (self._fstrim_ts > ts):
            return False
        # TODO take in to account disk io stats here
        return True

    def fstrim_iter_advance(self, count, update_ts=False):
        done = False
        self._fstrim_offset = self._fstrim_offset + count
        if self._fstrim_offset > self._size:
            done = True
            self._fstrim_offset = 0
            update_ts = True
        if update_ts:
            self._fstrim_ts = self._fstrim_end_ts
            self._fstrim_end_ts = time.time()
        return done


class HWInfo(object):
    def __init__(self):
        # Contains all disks
        self._raw_disks = {}
        # Contains disks with valid partition tables
        self._disk = {}
        self._vg = {}
        self._vol = {}
        self._mount_info = mount.read_mountinfo()

    def add_disk(self, name, dm):
        di = DiskInfo(name, dm)
        # Ignore devices with zero size
        if di.disk.size == 0:
            return None
        self._raw_disks[di.disk.device_path] = di
        if di.disk.id:
            self._disk[di.disk.id] = di
        return di

    def del_disk(self, name):
        if not self._raw_disks.get(name):
            return

        di = self._raw_disks[name]
        del self._raw_disks[name]
        if di.disk.id:
            del self._disk[di.disk.id]

    def get_disks(self):
        return self._raw_disks.itervalues()

    def lookup_disk_by_name(self, name):
        return self._raw_disks.get(name, None)

    def lookup_disk_by_id(self, id):
        return self._disk.get(id, None)

    def add_vg(self, vg_inf):
        vg = VGroupInfo(vg_inf, self)
        self._vg[vg._name] = vg
        return vg

    def del_vg(self, name):
        if self._vg.get(name):
            del self._vg[name]

    def get_vgroups(self):
        return self._vg.itervalues()

    def lookup_vg_by_name(self, name):
        return self._vg.get(name, None)

    def add_volume(self, lv_info, dm):
        v = VolumeInfo(lv_info, self, dm)
        self._vol[v._uuid] = v
        return v

    def del_volume_by_id(self, id):
        if self._vol.get(id):
            del self._vol[id]

    def del_volume_by_name(self, name):
        for v in self._vol:
            if v._name == name:
                self.del_volume_by_id(v._uuid)
                break

    def get_volumes(self):
        return self._vol.itervalues()

    def lookup_volume_by_id(self, id):
        return self._vol.get(id, None)

    def lookup_volume_by_name(self, name):
        rc = None
        for v in self.get_volumes():
            if v._name == name:
                rc = v
                break
        return rc

    def lookup_volume_by_mnt_path(self, mnt_path):
        rc = None
        for v in self.get_volumes():
            if v._mnt_path == mnt_path:
                rc = v
                break
        return rc


class FSTrimJob(threading.Thread):
    def __init__(self, dm, disk_id):
        threading.Thread.__init__(self)
        self._dm = dm
        self._disk_id = disk_id
        self._wakeup_event = threading.Event()
        self._stop_event = threading.Event()
        self.daemon = True

    def run(self):
        log.info("fstrim-job: {} start job, period:{} sec".format(self._disk_id, self._dm._fstrim_period))
        while not self._stop_event.is_set():
            self._wakeup_event.clear()
            next_wakeup = self._dm.handle_fstrim(self._disk_id, False)
            if next_wakeup == -1:
                log.info("fstrim-job: {} stop job disk is not supported".format(self._disk_id))
                self.fstrim_ts = 0
                return
            delta = next_wakeup - time.time()
            if delta > 0:
                self._wakeup_event.wait(delta)

    def wakeup(self):
        self._wakeup_event.set()

    def notify_stop(self):
        self._stop_event.set()
        self.wakeup()


class FSTrimIoctl(ctypes.Structure):
    _fields_ = [
        ('start', ctypes.c_ulong),
        ('count', ctypes.c_ulong),
        ('min_len', ctypes.c_ulong)
    ]


def _get_cfg_opt(defmap, config, name):
    if name in config:
        if name in ('enabled_kyber_models', 'enabled_kyber_models_yt'):
            return config[name].split(',')
        elif name == 'enabled_kyber_for_ssd':
            return config[name].lower() in ("yes", "true", "1")
        return config[name]
    return defmap[name]


class QuotaSyncJob(threading.Thread):
    def __init__(self, period):
        threading.Thread.__init__(self)
        self._period = period
        self._wakeup_event = threading.Event()
        self._stop_event = threading.Event()
        self.daemon = True

    def run(self):
        log.info("quotasync-job: start job, period:{} sec".format(self._period))
        while not self._stop_event.is_set():
            now = time.time()
            self._wakeup_event.clear()
            try:
                run(['/opt/diskmanager/utils/dqsync'])
            except Exception as ex:
                log.error('cmd: quotasync : %s' % str(ex))
            elapsed = time.time() - now
            if (self._period > elapsed):
                self._wakeup_event.wait(self._period - elapsed)

    def wakeup(self):
        self._wakeup_event.set()

    def notify_stop(self):
        self._stop_event.set()
        self.wakeup()


class DeadlockWatchdog(threading.Thread):
    def __init__(self, lock, timeout, msg=""):
        threading.Thread.__init__(self)
        self._lock = lock
        self._timeout = timeout
        self._msg = msg
        self._wakeup_event = threading.Event()
        self._stop_event = threading.Event()
        self.daemon = True

    def run(self):
        log.info("Start watchdog %s", self._msg)
        while not self._stop_event.is_set():
            start = time.time()
            timeout = start + self._timeout
            got_lock = False
            self._wakeup_event.clear()
            while time.time() < timeout and (not self._stop_event.is_set()):
                if self._lock.acquire(False):
                    self._lock.release()
                    got_lock = True
                    break
                else:
                    time.sleep(1)
                if self._stop_event.is_set():
                    break
            if not got_lock:
                log.error("Fail to acquire lock %s for more than %d seconds",  self._msg, self._timeout)
                log.error("=== Dump stack traces of all thread ==")
                for s in self.collect_stacks():
                    log.error(s)
            else:
                self._wakeup_event.wait(timeout - start)

    def collect_stacks(self):
        id2name = {th.ident: th.name for th in threading.enumerate()}
        code = []
        for threadId, stack in sys._current_frames().items():
            code.append("# Thread: %s(%d)" % (id2name.get(threadId, ""), threadId))
            for filename, lineno, name, line in traceback.extract_stack(stack):
                code.append('File: "%s", line %d, in %s' % (filename, lineno, name))
        return code

    def wakeup(self):
        self._wakeup_event.set()

    def notify_stop(self):
        self._stop_event.set()
        self.wakeup()


# Lock hierarhy
# ->_lock
#   ->_mnt_lock
class DiskManager(object):
    _DEF_CONFIG = {
        "ignore_dev": ['loop', 'dm', 'ram', 'sr', 'fd', 'vd', 'nbd'],
        "yasm_url": consts.DEFAULT_YASM_PROD_URL,
        "stat_push_period": consts.DEFAULT_YASM_PUSH_PERIOD,
        "quota_sync_period": consts.DEFAULT_QUOTA_SYNC_PERIOD,
        "fstrim_period": consts.DEFAULT_FSTRIM_PERIOD,
        "format_new_disk": False,
        "format_new_disk_with_grub": False,
        "default_io_scheduler": consts.DEFAULT_IO_SCHEDULER,
        "default_yp_export_io_limits_conf": consts.DEFAULT_YP_EXPORT_IO_LIMITS_CONF,
        "enabled_kyber_models_yt": consts.ENABLED_KYBER_MODELS_YT,
        "enabled_kyber_models": consts.ENABLED_KYBER_MODELS,
        "enabled_kyber_for_ssd": consts.ENABLED_KYBER_FOR_SSD
    }

    def __init__(self, config, cache_ttl=100, hot_swap_socket=""):
        self._id = str(uuid.uuid4())
        self._start_ts = time.time()
        self._lock = threading.Lock()
        self._mnt_lock = threading.Lock()
        self.exclude = DiskManager._obtain_ignore_devices_from_config(config)
        self._cache_ts = 0
        self._cache_ttl = cache_ttl
        self._format_new_disk = _get_cfg_opt(DiskManager._DEF_CONFIG, config, "format_new_disk")
        self._format_new_disk_with_grub = _get_cfg_opt(DiskManager._DEF_CONFIG, config, "format_new_disk_with_grub")
        self._default_io_scheduler = DiskManager._get_iosched_cfg_opt(DiskManager._DEF_CONFIG, config, "default_io_scheduler")
        self.iosched_policy = get_iosched_policy(_kernel_release, self._default_io_scheduler)
        self._stat_url = _get_cfg_opt(DiskManager._DEF_CONFIG, config, "yasm_url")
        self._stat_period = _get_cfg_opt(DiskManager._DEF_CONFIG, config, "stat_push_period")
        self._stat_ts = time.time()
        self._fstrim_stat_map = {}
        self._stat_remounts = 0
        self._stat_remount_errors = 0
        self._fstrim_period = _get_cfg_opt(DiskManager._DEF_CONFIG, config, "fstrim_period")
        log.info('Init DiskManager service build: %s pid: %d id: %s',
                 sv.svn_revision(), os.getpid(), self._id)
        log.info('dm config: {}'.format(config))
        self.yp_io_limits_config = _get_cfg_opt(DiskManager._DEF_CONFIG, config, "default_yp_export_io_limits_conf")
        self.yp_io_limits = limit.IOLimitsForYPExport(self.yp_io_limits_config)
        self.enabled_kyber_models_yt = _get_cfg_opt(DiskManager._DEF_CONFIG, config, "enabled_kyber_models_yt")
        self.enabled_kyber_models = _get_cfg_opt(DiskManager._DEF_CONFIG, config, "enabled_kyber_models")
        self.enabled_kyber_for_ssd = _get_cfg_opt(DiskManager._DEF_CONFIG, config, "enabled_kyber_for_ssd")
        self._hw_cache = HWInfo()
        self._do_update_cache()
        self._watchdog = DeadlockWatchdog(self._lock, 30, "DiskManager._lock")
        self._watchdog.start()
        self._quota_sync_period = _get_cfg_opt(DiskManager._DEF_CONFIG, config, "quota_sync_period")
        self._quota_sync_job = QuotaSyncJob(self._quota_sync_period)
        self._quota_sync_job.start()
        self._hot_swap_socket = hot_swap_socket

    @staticmethod
    def _obtain_ignore_devices_from_config(config):
        exclude = _get_cfg_opt(DiskManager._DEF_CONFIG, config, "ignore_dev") or []
        if "include_dev" in config:
            exclude = [dev for dev in exclude if dev not in config["include_dev"]]
        return exclude

    @staticmethod
    def _get_iosched_cfg_opt(defmap, config, name):
        def_iosched = defmap[name]
        assert(def_iosched in consts.AVAIL_IO_SCHEDULERS)

        if name in config:
            iosched = config[name]
            if iosched in consts.AVAIL_IO_SCHEDULERS:
                return iosched
            log.error('provided io scheduler \"{}\" is not one of the available: {}, falling back to default \"{}\"'.format(iosched, consts.AVAIL_IO_SCHEDULERS, def_iosched))

        return def_iosched

    def _query_hwinfo(self):
        for d in Disk.list_disks():
            ignore = False
            # TODO May be it is reasonable to use regexp here
            for e in self.exclude:
                if d.startswith(e):
                    ignore = True
                    break
            if not ignore:
                self._hw_cache.add_disk(d, self)

        for vg_inf in LVM.list_vg():
            self._hw_cache.add_vg(vg_inf)

        for lv_inf in LVM.list_lv():
            self._hw_cache.add_volume(lv_inf, self)

    def _do_update_cache(self):
        old_cache = self._hw_cache
        self._hw_cache = HWInfo()
        self._query_hwinfo()
        # HOSTMAN-691 directory structure crutch
        mounts_storage_classes = {}
        # Transfer old state to new state
        self.yp_io_limits = limit.IOLimitsForYPExport(cache=self.yp_io_limits)
        for di in self._hw_cache.get_disks():
            old_di = old_cache.lookup_disk_by_id(di.disk.id)
            if old_di:
                di.transfer_state(old_di)
            di.check_ioscheduler()
            di.config_fstrim()
            if not di.check_mount_state():
                di._s_configured.set(False, 'Mount check failed')
            # check if we have /ssd mount or /place is ssd
            if di.disk.parts:
                d_parts = di.disk.parts
            elif di.disk.virt_part is not None:
                d_parts = [di.disk.virt_part]
            else:
                d_parts = []
            for p in d_parts:
                if p.mnt_path:
                    mounts_storage_classes[p.mnt_path] = di.disk.storage_class
            di.check_samsung_io_limits()
            di._yp_io_limits = self.yp_io_limits.get(di.disk)

        # Cleanup disks which disappeared
        for di in old_cache.get_disks():
            di.stop_bg_threads(wait=True)

        for vi in self._hw_cache.get_volumes():
            old_vi = old_cache.lookup_volume_by_id(vi._uuid)
            if old_vi:
                vi.transfer_state(old_vi)
            if not vi.check_mount_state():
                vi._s_configured.set(False, 'Mount check failed')

            if vi._mnt_path:
                vg = self._hw_cache.lookup_vg_by_name(vi._vg_name)
                if vg:
                    pd = self._hw_cache.lookup_disk_by_id(vg._disk_id)
                    if pd:
                        mounts_storage_classes[vi._mnt_path] = pd.disk.storage_class

        self._cache_ts = time.time()
        log.debug('hw_cache_ts: {}'.format(self._cache_ts))

        # DISKMAN-72: create alias for /ssd
        log.debug("Discovered mounts storage classes: {}".format(mounts_storage_classes))
        if mounts_storage_classes.get('/place') in consts.STORAGE_NON_ROTATIONAL and not mounts_storage_classes.get('/ssd'):
            log.debug('/place is ssd and there is no /ssd mount')
            err = dirutil.ensure_dir('/place/ssd', 1049, 1049, 0o755)
            if err:
                log.error('Failed to ensure /place/ssd: {}'.format(err))
                raise OSError(err)
            log.debug('Sucessfully ensured /place/ssd dir')
            err = dirutil.ensure_link('/ssd', '/place/ssd')
            if err:
                log.error('Failed to ensure /ssd -> /place/ssd symlink: {}'.format(err))
                raise OSError(err)
            log.debug('Sucessfully ensured /ssd -> /place/ssd symlink')

        # HOSTMAN-691: ensure rtc-related directory and symlink structure
        err = _ensure_rtc_place_tree()
        if err:
            log.error("Failed to ensure RTC /place tree: {}".format(err))
            raise OSError(err)
        else:
            log.debug("Successfully ensured /place directory tree")
        have_ssd = mounts_storage_classes.get('/place') in consts.STORAGE_NON_ROTATIONAL or mounts_storage_classes.get(
            '/ssd') is not None
        log.debug("System has /ssd. Ensuring /ssd directory tree")
        if have_ssd:
            err = _ensure_rtc_ssd_tree()
            if err:
                log.error("Failed to ensure RTC /ssd tree: {}".format(err))
                raise OSError(err)
            else:
                log.debug("Successfully ensured /ssd directory tree")
        log.debug("Ensuring /basesearch structure")
        err = _ensure_rtc_basesearch_tree(have_ssd)
        if err:
            log.error('Failed to ensure /basesearch tree: {}'.format(err))
            raise OSError(err)
        else:
            log.debug("Successfully ensured /basesearch directory tree")

    def __do_format_new_disk(self, disk_id):
        di = self._hw_cache.lookup_disk_by_id(disk_id)
        if not di:
            return
        dev_path = di.disk.device_path
        log.info('autoformat disk: {} dev:{}'.format(disk_id, dev_path))
        # Partition layout:
        # p1: [1M, 15M], Bios partition with grub
        # p2: [15M, 1024M], reserved partition for future changes
        # p3  [1024M, end] lvm_pv, main allocation pool
        Disk.make_gpt(di.disk.device_path)
        alignment = 1024 ** 2
        p1_start = alignment
        p1_len = 14 * alignment
        p2_start = 16 * alignment
        p2_len = BYTES_IN_GB - p2_start - alignment
        p3_start = BYTES_IN_GB
        p3_len = (((di.disk.size) / alignment) * alignment) - p3_start
        Disk.add_gpt_part(di.disk.device_path, 1, p1_start / 512, p1_len / 512, Disk.PART_TYPES['bios'], 'bios')
        Disk.add_gpt_part(di.disk.device_path, 2, p2_start / 512, p2_len / 512, Disk.PART_TYPES['linux_filesystem'], 'diskman_srv')
        Disk.add_gpt_part(di.disk.device_path, 3, p3_start / 512, p3_len / 512, Disk.PART_TYPES['linux_lvm'], 'diskman_pv')
        # Force cache update after partition update
        self._do_update_cache()
        di = self._hw_cache.lookup_disk_by_id(disk_id)
        if not di:
            log.error('Disk {} disappeared during autoformat dev:{}'.format(disk_id, dev_path))
            raise Exception('Disk id:{} {} absent'.format(disk_id, dev_path))
        try:
            vg_name = 'diskman-vg-' + di.disk.id
            tags = ['diskman=true']
            tags.append('diskman.fstrim=%s' %
                        diskman_pb2.DiskSpec.FSTrimPolicy.Name(di._fstrim_policy))
            pv_name = di.disk.parts[-1].device_path
            LVM.create_vg(vg_name, [pv_name], tags)
        except OSError as e:
            log.error('Vg creation error on Disk {} {}, err:{} '.format(disk_id, dev_path, str(e)))
            di._s_error.set(True, 'Fail to autoformat, vg creation falure')
            self._do_update_cache()
            return

        if self._format_new_disk_with_grub:
            try:
                Disk.grub_install(dev_path)
            except OSError as e:
                log.error('Grub install failed for disk {} {}, err:{} '.format(disk_id, dev_path, str(e)))
                di._s_error.set(True, 'Grub install failed')
        # Force cache update after partition update
        self._do_update_cache()

    def _do_format_new_disk(self, disk_id):
        try:
            self.__do_format_new_disk(disk_id)
        except Exception, e:
            log.error('Fail to autoformat new disk {}, err{}'.format(disk_id, str(e)))
            di = self._hw_cache.lookup_disk_by_id(disk_id)
            if di:
                di._s_error.set(True, 'Fail to autoformat')

    def _check_new_disks(self):
        if not self._format_new_disk:
            return
        while True:
            disk_id = None
            for di in self._hw_cache.get_disks():
                if di.may_import_as_new():
                    disk_id = di.disk.id
                    log.info('New disk found: {} '.format(disk_id))
                    break
            if not disk_id:
                break
            self._do_format_new_disk(disk_id)

    def _check_cache(self, force=False):
        if time.time() - self._cache_ts > self._cache_ttl:
            force = True
        if not force:
            return
        self._do_update_cache()
        self._check_new_disks()

    @logrpc
    @synchronized('_lock')
    def list_disks(self, request, context):
        self._check_cache()
        response = diskman_pb2.ListDisksResponse()

        if not request.disk_ids:
            for di in self._hw_cache.get_disks():
                r = response.disks.add()
                di.fill_pb_info(r)
        else:
            for disk_id in request.disk_ids:
                di = self._hw_cache.lookup_disk_by_id(disk_id)
                if not di:
                    context.abort(EINVAL, 'Can not find disk with id "%s"' % disk_id)
                r = response.disks.add()
                di.fill_pb_info(r)

        return response

    @logrpc
    @synchronized('_lock')
    def format_disk(self, request, context):
        self._check_cache()
        req_id = logger.get_req_id(request, context)
        response = diskman_pb2.FormatDiskResponse()
        if not request.disk_id:
            context.abort(EINVAL, 'disk_id is mandatory')
        di = self._hw_cache.lookup_disk_by_id(request.disk_id)
        if not di:
            context.abort(EINVAL, 'Can not find disk %s' % request.disk_id)
        if len(request.partitions) == 0:
            context.abort(EINVAL, 'PartitionSpec is mandatory')

        log.info("[%s] FormadDisk %s" % (req_id, di.long_name()))

        # Validate PartitionSpec
        vg_src = False
        pv_idx = 0
        part_idx = 0

        end = long(0)
        for p in request.partitions:
            part_idx = part_idx + 1
            if end > p.start_bytes:
                context.abort(EINVAL, 'Partition %d overflow with others' % part_idx)
            end = p.start_bytes + p.size_bytes
            if end > di.disk.size:
                context.abort(EINVAL, 'Partition %d beyond end of disk' % part_idx)
            if p.volume_source:
                if vg_src:
                    context.abort(EINVAL, 'Multiple PV is not supported for a single disk')
                    return
                vg_src = True
                pv_idx = part_idx
        if di._s_configured._val:
            if request.force is False:
                context.abort(grpc.StatusCode.FAILED_PRECONDITION,
                              'Disk is configured already, use force mode to override')
            try:
                LVM.delete_vg(di._vg_name, True)
            except OSError as e:
                context.abort(grpc.StatusCode.ABORTED, 'Force format failed: LVM.cmd failed %s' % str(e))

        dsk_path = di.disk.device_path
        Disk.make_gpt(dsk_path)
        part_idx = 0

        for p in request.partitions:
            part_idx = part_idx + 1
            Disk.add_gpt_part(dsk_path, part_idx, p.start_bytes / 512, p.size_bytes / 512, p.type)

        # Force cache update after partinion update
        self._check_cache(force=True)
        di = self._hw_cache.lookup_disk_by_id(request.disk_id)
        if not di:
            context.abort(EINVAL, 'Can not find disk %s' % request.disk_id)
        if pv_idx > 0:
            if not di:
                context.abort(EINVAL, 'Can not find disk %s' % request.disk_id)
            if di.disk.parts <= pv_idx:
                context.abort(EINVAL, 'Bad partition table found %s' % request.disk_id)

            vg_name = 'diskman-vg-' + di.disk.id
            tags = ['diskman=true']
            self._fstrim_policy = request.fstrim
            tags.append('diskman.fstrim=%s' %
                        diskman_pb2.DiskSpec.FSTrimPolicy.Name(self._fstrim_policy))
            for k, v in request.labels.iteritems():
                tags.append("diskman.label_%s=%s" % (k, v))
            pv_name = di.disk.parts[pv_idx - 1].device_path
            try:
                LVM.create_vg(vg_name, [pv_name], tags)
            except OSError as e:
                context.abort(grpc.StatusCode.ABORTED, 'LVM.cmd failed %s' % str(e))
            # Force cache update one more time to update disk status to configured
            self._check_cache(force=True)
            di = self._hw_cache.lookup_disk_by_id(request.disk_id)
            if not di:
                context.abort(EINVAL, 'Can not find disk %s' % request.disk_id)
        di.fill_pb_info(response.disk)
        return response

    @logrpc
    @synchronized('_lock')
    def list_volumes(self, request, context):
        self._check_cache()
        response = diskman_pb2.ListVolumesResponse()

        if not request.volume_ids and not request.mount_paths:
            for di in self._hw_cache.get_volumes():
                r = response.volumes.add()
                di.fill_pb_info(r)
        else:
            for volume_id in request.volume_ids:
                di = self._hw_cache.lookup_volume_by_id(volume_id)
                if not di:
                    context.abort(EINVAL, 'Can not find volume with id "%s"' % volume_id)
                r = response.volumes.add()
                di.fill_pb_info(r)

            for mnt_path in request.mount_paths:
                di = self._hw_cache.lookup_volume_by_mnt_path(mnt_path)
                if not di:
                    context.abort(EINVAL, 'Can not find volume with mount_path "%s"' % mnt_path)
                r = response.volumes.add()
                di.fill_pb_info(r)

        return response

    def validate_vol_spec(self, spec, context):
        if not spec.name:
            context.abort(EINVAL, 'name is required')
        if not spec.disk_id:
            context.abort(EINVAL, 'disk_id required')
        if spec.capacity_bytes == 0:
            context.abort(EINVAL, 'Bad size:% d' % spec.size)
        # TODO check that size will not exceed disk size here
        if spec.WhichOneof("access_type") == 'block':
            pass
        else:
            if spec.mount.mount_policy == VolumeInfo.MP_NONE:
                context.abort(EINVAL, 'spec.mount.mount_policy required')
            if not spec.mount.fs_type:
                context.abort(EINVAL, 'spec.mount.fs_type required')
            if spec.mount.mount_flags and (not spec.mount.mount_policy == VolumeInfo.MP_CUSTOM):
                context.abort(EINVAL, 'spec.mount.mount_flags is conflict with spec.mount.mount_policy %s' %
                              spec.mount.mount_policy)
        found = {}
        for k, v in spec.labels.iteritems():
            if k in found:
                context.abort(EINVAL, 'spec.labels duplicate name:%s' % k)
            found[k] = v

    @logrpc
    @require('volume_spec')
    @synchronized('_lock')
    def create_volume(self, request, context):
        self._check_cache()
        spec = request.volume_spec
        self.validate_vol_spec(spec, context)
        req_id = logger.get_req_id(request, context)
        response = diskman_pb2.CreateVolumeResponse()

        # Check that we already have volume with that name
        v = self._hw_cache.lookup_volume_by_name(spec.name)
        if v:
            if v.cmp_spec(spec):
                context.abort(grpc.StatusCode.ALREADY_EXISTS,
                              'Volume already exists, but spec does not match, id:% s' % v._uuid)
            else:
                # Volume with that spec already exists, do nothing
                r = response.volume
                v.fill_pb_info(r)
                return response
        # Volume not exists yet.
        disk = self._hw_cache.lookup_disk_by_id(spec.disk_id)
        if not disk:
            context.abort(grpc.StatusCode.NOT_FOUND,
                          'Parent disk not found')
            return
        if not disk._s_ready._val:
            context.abort(grpc.StatusCode.FAILED_PRECONDITION,
                          'Parent disk is not ready')

        if not disk._vg_name:
            context.abort(grpc.StatusCode.INTERNAL, 'Disk has no valid VG')

        if spec.capacity_bytes % disk._extent_size:
            context.abort(grpc.StatusCode.INVALID_ARGUMENT, 'VolumeSpec.capacity_bytes is not aligned to DiskSpec.extent_size_bytes')

        if disk._allocatable_bytes < spec.capacity_bytes:
            context.abort(grpc.StatusCode.RESOURCE_EXHAUSTED, 'Disk does not have enough space')

        log.info("[%s] CreateVolume %s on %s" % (req_id, spec.name, disk.long_name()))
        tags = ['diskman=true']
        for k, v in spec.labels.iteritems():
            tags.append("diskman.label_%s=%s" % (k, v))

        if spec.WhichOneof("access_type") == 'block':
            tags.append('diskman.access_type=block')
        else:
            tags.append('diskman.access_type=mount')
            tags.append('diskman.fs_type=%s' % spec.mount.fs_type)
            mnt_policy = diskman_pb2.VolumeSpec.MountVolume.MountPolicy.Name(spec.mount.mount_policy)
            tags.append('diskman.mount_policy=%s' % mnt_policy)
            tags.append('diskman.root_owner_uid=%d' % spec.mount.root_owner.uid)
            tags.append('diskman.root_owner_gid=%d' % spec.mount.root_owner.gid)
            if spec.mount.mount_flags:
                mount_flags = spec.mount.mount_flags
                # custom mount options may contains special characteds (which are not allower in lvm_tags),
                # Store it as base64
                if (spec.mount.mount_policy == VolumeInfo.MP_CUSTOM):
                    mount_flags = 'base64:' + base64.urlsafe_b64encode(spec.mount.mount_flags)
                tags.append('diskman.mount_flags=%s' % mount_flags)
            tags.append('diskman.fstrim=%d' % spec.mount.fstrim)
        try:
            LVM.create_lv(spec.name, spec.capacity_bytes, disk._vg_name, tags)
            lv_list = LVM.list_lv(name=disk._vg_name + '/' + spec.name)
            assert (len(lv_list) == 1)
            lv_info = lv_list[0]
        except OSError as e:
            context.abort(grpc.StatusCode.ABORTED, 'LVM.cmd failed %s' % str(e))

        # Success, new volume created
        v = self._hw_cache.add_volume(lv_info, self)
        r = response.volume
        v.fill_pb_info(r)
        self._check_cache(force=True)
        return response

    @logrpc
    @require('volume_id')
    @synchronized('_lock')
    def delete_volume(self, request, context):
        self._check_cache()
        req_id = logger.get_req_id(request, context)
        resp = diskman_pb2.CreateVolumeResponse()
        v = self._hw_cache.lookup_volume_by_id(request.volume_id)
        if not v:
            # Nothing to do
            return resp
        if not v._s_configured._val:
            context.abort(grpc.StatusCode.FAILED_PRECONDITION,
                          'Volume is not owned by service: %s' % v._s_configured.get_msg())

        try:
            log.info("[%s] DeleteVolume %s" % (req_id, v.long_name()))
            LVM.delete_lv(v._name, v._vg_name, True)
        except OSError as e:
            context.abort(grpc.StatusCode.ABORTED, 'LVM.delete_lv failed : %s' % str(e))

        self._hw_cache.del_volume_by_id(request.volume_id)
        self._do_update_cache()
        return resp

    @logrpc
    @require('volume_id', 'mount_path')
    @synchronized('_lock')
    @synchronized('_mnt_lock')
    def mount_volume(self, request, context):
        self._check_cache()
        req_id = logger.get_req_id(request, context)
        response = diskman_pb2.MountVolumeResponse()

        v = self._hw_cache.lookup_volume_by_id(request.volume_id)
        if not v:
            context.abort(grpc.StatusCode.NOT_FOUND,
                          'Volume %s does not exist' % request.volume_id)
        if not os.path.isabs(request.mount_path):
            context.abort(grpc.StatusCode.FAILED_PRECONDITION,
                          'MountVolumeResponse.mount_path must be absolute_path, given {}'.format(request.mount_path))
        if not v._s_ready._val:
            context.abort(grpc.StatusCode.FAILED_PRECONDITION,
                          'Volume is not ready: %s' % v._s_ready.get_msg())
        log.info("[%s] MountVolume %s at %s" % (req_id, v.long_name(), request.mount_path))
        v.mount(request.mount_path)
        self._check_cache(force=True)
        return response

    @logrpc
    @require('volume_id')
    @synchronized('_lock')
    @synchronized('_mnt_lock')
    def umount_volume(self, request, context):
        self._check_cache()
        req_id = logger.get_req_id(request, context)
        response = diskman_pb2.UmountVolumeResponse()
        v = self._hw_cache.lookup_volume_by_id(request.volume_id)
        if not v:
            context.abort(grpc.StatusCode.NOT_FOUND, 'Volume %s does not exist' % id)
        log.info("[%s] UmountVolume %s at %s" % (req_id, v.long_name(), v._mnt_path))
        v.umount()
        self._check_cache(force=True)
        return response

    @logrpc
    @require('id')
    @synchronized('_lock')
    def set_iolimit_volume(self, request, context):
        self._check_cache()
        req_id = logger.get_req_id(request, context)
        response = diskman_pb2.SetIOLimitResponse()

        v = self._hw_cache.lookup_volume_by_id(request.id)
        if not v:
            context.abort(grpc.StatusCode.NOT_FOUND,
                          'Volume %s does not exist' % request.volume_id)
        lim = request.iolimit
        if not v._s_ready._val:
            context.abort(grpc.StatusCode.FAILED_PRECONDITION,
                          'Volume is not ready: %s' % v._s_ready.get_msg())
            log.info("[%s] SetIOLimitVolume %s at [%d/%d], [%d/%d]" % (req_id,
                                                                       v.long_name(),
                                                                       lim.read.ops_per_second,
                                                                       lim.read.bytes_per_second,
                                                                       lim.write.ops_per_second,
                                                                       lim.write.bytes_per_second))
        v.set_iolimit(lim.read.ops_per_second,
                      lim.read.bytes_per_second,
                      lim.write.ops_per_second,
                      lim.write.bytes_per_second)
        return response

    @logrpc
    @synchronized('_lock')
    def get_yt_devs(self, request, context):
        self._check_cache()
        response = diskman_pb2.GetYTMountedDevicesResponse()

        for di in self._hw_cache.get_disks():
            for p in di.disk.parts:
                if p.mnt_path and p.mnt_path.startswith('/yt/'):
                    r = response.mounted_devices.add()
                    r.mount_path = p.mnt_path
                    r.device_path = p.device_path

        return response

    @logrpc
    @synchronized('_lock')
    def get_hot_swap_creds(self, request, context):
        self._check_cache()
        response = diskman_pb2.GetHotSwapCredsResponse()
        if self._hot_swap_socket:
            m = response.mounts.add()
            m.container_path = self._hot_swap_socket
            m.host_path = consts.DEFAULT_SERVER_UNIX_SOCK
            m.read_only = False

        return response

    # Called from background threads
    # Locking: drops self._lock internally, acquires self._mnt_lock
    # Return: next timestamp
    def _handle_fstrim(self, disk_id):
        cur_ts = time.time()
        last_ts = cur_ts
        di = self._hw_cache.lookup_disk_by_id(disk_id)
        if not di:
            self._fstrim_stat_map.pop(disk_id, None)
            return -1
        v = self._hw_cache.lookup_volume_by_id(di._fstrim_last_vol)
        if v is not None:
            if not v.need_fstrim(last_ts - self._fstrim_period):
                v = None
            else:
                last_ts = v._fstrim_ts
        # Find new candidate for fstrim
        if v is None:
            for vc in self._hw_cache.get_volumes():
                if not vc._disk_id == disk_id:
                    continue
                if not vc.may_fstrim():
                    continue
                if not vc.need_fstrim(last_ts):
                    continue
                last_ts = vc._fstrim_ts
                v = vc

        # Check if we walk disk's partition first
        part, last_ts = di.next_fstrim(last_ts)
        delay = cur_ts - last_ts
        if (delay < self._fstrim_period):
            to_wait = self._fstrim_period - delay
            next_wakeup = cur_ts + to_wait + random.randint(1, 60)
            log.debug('fstrim-job: {}, no candidated found, sleep for {} sec'.format(disk_id, int(next_wakeup - cur_ts)))
            return next_wakeup

        args = FSTrimIoctl()
        count = args.count = di._fstrim_max_len
        args.min_len = di._fstrim_min_len
        if part is not None:
            vid = part.uuid
            mnt = part.mnt_path
            args.start = di._fstrim_offset
        else:
            vid = di._fstrim_last_vol = v._uuid
            mnt = v._mnt_path
            args.start = v._fstrim_offset

        self._mnt_lock.acquire()
        self._lock.release()
        cur_ts = time.time()
        fd = - 1
        rc = -1
        if args.start == 0:
            log.debug('fstrim-job: {} Start volume: {}, mount at: {}, delay: {}'.format(disk_id, vid, mnt, int(delay)))
        try:
            try:
                fd = os.open(mnt, os.O_DIRECTORY)
                rc = fcntl.ioctl(fd, FITRIM, args)
            except EnvironmentError as e:
                rc = e.errno
        finally:
            if fd != -1:
                os.close(fd)
            self._mnt_lock.release()
            self._lock.acquire()
        wait = 0.01 + ((time.time() - cur_ts) * di._fstrim_wait_factor)
        # TODO: Once we reacquire lock we may update fstrim iterator.
        # This peace looks ugly. Propper locking should change it.
        done = False
        if part is not None:
            done = di.fstrim_iter_advance(count, update_ts=bool(rc))
        else:
            v = self._hw_cache.lookup_volume_by_id(vid)
            if v is not None:
                done = v.fstrim_iter_advance(count, update_ts=bool(rc))
        # Update stats
        # EINVAL is possible if we have reach EOFS, ignore it
        if rc == errno.EINVAL and done:
            rc = 0

        fstrim_stat = self._fstrim_stat_map.get(disk_id)
        if fstrim_stat is None:
            fstrim_stat = self._fstrim_stat_map[disk_id] = FSTrimStat(di)

        if rc:
            if rc == errno.EOPNOTSUPP:
                # Signal that caller should stop call this metod
                self._fstrim_stat_map.pop(disk_id, None)
                return -1
            log.error('fstrim-job: %s Failed fstrim id:%s mnt:%s '
                      'start:%d count:%d min_len:%d ret:%d' % (disk_id, vid, mnt,
                                                               args.start,
                                                               args.count, args.min_len, rc))
            fstrim_stat.inc_errors()
        else:
            fstrim_stat.inc_bytes(args.count, count)
        if done:
            log.debug('fstrim-job: {} Complete with volume: {}, mount at: {}'.format(disk_id, vid, mnt))
            fstrim_stat.inc_loops()

        return time.time() + wait

    def handle_fstrim(self, disk_id, may_block=True):
        next_wakeup = 0
        if self._lock.acquire(may_block):
            try:
                next_wakeup = self._handle_fstrim(disk_id)
            finally:
                self._lock.release()
        else:
            next_wakeup = time.time() + 0.01
        return next_wakeup

    # Locking: drops self._lock internally
    @synchronized('_lock')
    def push_stats(self, last_err):
        vol = 0
        vol_mnt = 0
        vol_ready = 0
        vol_size = 0
        disk = 0
        disk_ready = 0
        err = 0

        for v in self._hw_cache.get_volumes():
            vol += 1
            vol_size += v._size
            if v._mnt_path:
                vol_mnt += 1
            if v._s_ready._val:
                vol_ready += 1
        for d in self._hw_cache.get_disks():
            disk += 1
            if d._s_ready._val:
                disk_ready += 1

        yasm_data = [{
            'tags': {
                'itype': consts.DEFAULT_YASM_PDOD_ITYPE,
                'ctype': consts.DEFAULT_YASM_PDOD_CTYPE
            },
            'values': [
                {
                    'name': 'nr_volumes_tmmm',
                    'val': vol
                },
                {
                    'name': 'volumes_size_GB_tmmm',
                    'val': (vol_size / 1024 ** 3)
                },
                {
                    'name': 'nr_volumes_mounts_tmmm',
                    'val': vol_mnt
                },
                {
                    'name': 'nr_volumes_ready_tmmm',
                    'val': vol_ready
                },
                {
                    'name': 'nr_disks_tmmm',
                    'val': disk
                },
                {
                    'name': 'nr_disks_ready_tmmm',
                    'val': disk_ready
                },
                {
                    'name': 'remount_tmmm',
                    'val': self._stat_remounts
                },
                {
                    'name': 'remount_errors_tmmm',
                    'val': self._stat_remount_errors
                },
            ]
        }]
        for fstrim_stat in self._fstrim_stat_map.values():
            yasm_data.append({
                'tags': {
                    'itype': consts.DEFAULT_YASM_PDOD_ITYPE,
                    'ctype': consts.DEFAULT_YASM_PDOD_CTYPE,
                    'disk': fstrim_stat.get_disk_name()
                },
                'values': fstrim_stat.get_yasm_values()
            })
        self._lock.release()
        try:
            ret = requests.post(self._stat_url, json.dumps(yasm_data), timeout=15)
            if not ret.status_code == 200:
                err = 1
                if not last_err:
                    log.info('RET code {0} returned -- full json dump {1}'.format(ret.status_code, ret.json()))
        except Exception as ex:
            err = 1
            if not last_err:
                log.info('Unable to forward to yasm: %s' % str(ex))

        self._lock.acquire()
        self._stat_ts = time.time()
        return err

    @logrpc
    @synchronized('_lock')
    def daemon_get_stat(self, request, context):
        req_id = logger.get_req_id(request, context)
        log.info("[%s] daemon_get_stats" % (req_id))
        r = diskman_pb2.DaemonGetStatResponse()
        for fstrim_stat in self._fstrim_stat_map.values():
            fstrim_stat.fill_stat_response(r)
        r.remounts = self._stat_remounts
        r.remount_errors = self._stat_remount_errors
        return r

    @logrpc
    @synchronized('_lock')
    def daemon_update_cache(self, request, context):
        req_id = logger.get_req_id(request, context)
        log.info("[%s] daemon_update_cache" % (req_id))
        self._check_cache(True)
        return diskman_pb2.DaemonUpdateCacheResponse()

    @logrpc
    @synchronized('_lock')
    def daemon_set_loglevel(self, request, context):
        req_id = logger.get_req_id(request, context)
        log.info("[%s] daemon_set_loglevel: %d", req_id, request.verbosity)
        logger.set_log_level(log, request.verbosity)
        return diskman_pb2.DaemonSetLoglevelResponse()

    @logrpc
    @synchronized('_lock')
    def server_info(self, request, context):
        req_id = logger.get_req_id(request, context)
        log.info("[%s] server_get_info" % (req_id))
        r = diskman_pb2.ServerInfoResponse()
        r.version = str(sv.svn_revision())
        r.id = self._id
        r.main_pid = os.getpid()
        r.start_time.FromSeconds(int(self._start_ts))
        r.config.autoformat = self._format_new_disk
        return r

    def serve(self):
        last_err = 0
        try:
            while True:
                time.sleep(self._stat_period)
                last_err = self.push_stats(last_err)
        except Exception as ex:
            log.info("Got exception :%s " % ex)
        finally:
            log.info("Stop DiskManager service build: %s pid: %d id:%s", sv.svn_revision(), os.getpid(), self._id)

    def stop_bg_threads(self, wait=True):
        for job in [self._quota_sync_job, self._watchdog]:
            if job is not None:
                job.notify_stop()
                if wait:
                    job.join()

        for di in self._hw_cache.get_disks():
            di.stop_bg_threads(wait=wait)
