from __future__ import unicode_literals
from __future__ import print_function
import time

import collections

import gevent
import gevent.pool
import requests
import logging
import ujson
import lz4.block
from StringIO import StringIO
from gevent.pool import Pool as GeventPool
from infra.swatlib.gevent import geventutil
import cachetools

from infra.deploy_export_stats.src.libs.thrift import client
from infra.deploy_export_stats.src.libs.iss_thrift3 import IssService
from infra.deploy_export_stats.src.libs.iss_thrift3 import ttypes

from infra.deploy_export_stats.src.libs.metrics import ROOT_REGISTRY
from infra.deploy_export_stats.src.reporters.base import BaseReporter
from infra.deploy_export_stats.src.reporters.yp_allocated import YpAllocatedReporter
from collections import defaultdict, namedtuple

PORTION = ttypes.Portion(offset=0, limit=2147483647)
CONTAINER_NAMES = ['slot.', '', 'iss_hook_start.']
ISS_MULTI_CLIENTS = ['ISS_MAN', 'ISS_VLA', 'ISS_SAS', 'ISS_MSK']
GENCFG_HOSTS_DATA_URL = 'https://proxy.sandbox.yandex-team.ru/last/GENCFG_HOSTS_DATA'

SERVICES_CONFIG_CACHE = cachetools.Cache(maxsize=50000)

SERVICES_CURRENT_STATS = cachetools.Cache(maxsize=50000)

NODES_TOTAL_VCPU_CACHE = cachetools.Cache(maxsize=1000000)  # type: dict[str, NodeInfo]

# 'https://a.yandex-team.ru/api/tree/blob/trunk/data/gencfg_db/hardware_data/models.yaml'
CPU_MODELS_NCPU_INFO = {'X5450': 8, 'E3-1265Lv3': 8, 'E5-2683v4': 64, 'E5630': 16, 'Gold5120': 56, 'E5410': 8,
                        'E5-2640v3': 32,
                        'Gold6130': 64, 'E5-2640v4': 40, 'AMD6168': 24, 'E5-2630v4_early_sample': 40, 'E5345': 8,
                        'L5410': 8,
                        'E5540': 16, 'AMD6238': 24, 'Epyc7351': 64, 'E5620': 16, 'UnknownBroadwell': 88,
                        'E5-2683v3': 56,
                        'E3-1280v3': 8, 'unknown': 1, 'X5675': 24, 'Gold6126': 48, 'Platinum8180': 112, 'X5365': 8,
                        'E5-2697v3': 56, 'E5-2650Lv3': 48, 'C2750': 8, 'E3-1230Lv3': 8, 'E5645': 24, 'AMD6176': 24,
                        'E5-2660v4': 56, 'E5-2660v2': 40, 'E5-2660v3': 40, 'UnknownSkylake': 112, 'E5-4607v2': 48,
                        'E5-2695v4': 72, 'E5530': 16, 'E5-2667': 24, 'E5440': 8, 'AMD6172': 24, 'E5-2660': 32,
                        'E5462': 8,
                        'Gold6138': 80, 'E3-1275v3': 8, 'Gold6230': 80, 'E5-2650v3': 40, 'E5-2650v2': 32,
                        'Gold6136': 48,
                        'E5-2650v4': 48, 'E5-2643v3': 24, 'E5-2667v3': 32, 'X5670': 24, 'E5-2667v4': 32,
                        'E5-2652v3': 40,
                        'E5-2640v4_early_sample': 48, 'AMD6274': 32, 'E5-2667v2': 32}

NOT_FOUND_SEGMENT = 'not-found'

log = logging.getLogger('iss')


class NodeInfo(object):
    def __init__(self, fqdn):
        self.fqdn = fqdn
        self.cluster = None
        self.vcpu = None
        self.segment = None
        self.yp_segment = None
        self.vcpu_used = 0.0
        self.over_commit_cpu = 0.0
        self.instances = 0
        self.source = None
        self.rtc_tag = False
        self.deploy_engine = None


class EngineClient(object):

    def __init__(self, deploy_engine, client):
        self.deploy_engine = deploy_engine
        self.client = client


class ClusterStats(object):

    def __init__(self):
        self.cpu_guarantee = 0
        self.cpu_over_quota = 0.0
        self.memory_guarantee = 0
        self.instances = 0
        self.services = 0
        self.iss_requests = 0

    @property
    def memory_gb(self):
        return self.memory_guarantee / 1024 ** 3

    def __str__(self):
        return ("CPU: {s.cpu_guarantee}c(> {over_quota}); "
                "MEM: {s.memory_gb}Gb; "
                "INST: {s.instances}".format(s=self, over_quota=int(self.cpu_over_quota)))

    def __repr__(self):
        return str(self)

    def join(self, other):
        self.cpu_guarantee += other.cpu_guarantee
        self.cpu_over_quota += other.cpu_over_quota
        self.memory_guarantee += other.memory_guarantee
        self.instances += other.instances
        self.services += other.services

    def to_dict(self):
        return {
            'i': self.instances,
            'm': self.memory_gb,
            'c': int(self.cpu_guarantee),
            'o': int(self.cpu_over_quota)
        }


class RequestsStats(object):
    def __init__(self):
        self.total = 0
        self.not_found = 0

    def __str__(self):
        return "total={s.total}, not_found={s.not_found}".format(s=self)

    def __repr__(self):
        return str(self)


class ServiceStats(object):

    def __init__(self):
        self.total_services = 0
        self.active_services = 0

    def __str__(self):
        return "total={s.total_services}, active={s.active_services}".format(s=self)

    def __repr__(self):
        return str(self)


class ConfigurationInfo(object):

    def __init__(self, service_id, conf_ids, deploy_engine):
        self.service_id = service_id
        self.conf_ids = conf_ids
        self.deploy_engine = deploy_engine


def _get_deploy_engine(s):
    return s['runtime_attrs']['content']['engines']['engine_type']


def _get_conf_info(s):
    target = s['target_state']['content']
    snap_id = target.get('snapshot_id')
    if not snap_id:
        # Service has no ACTIVE snapshots
        return None
    deploy_engine = _get_deploy_engine(s)
    if deploy_engine == 'YP_LITE':
        # Skip all YP_LITE services
        return None
    conf_ids = []
    # At first we must check target service configuration
    target_conf_id = target['snapshot_meta_info'].get('conf_id')
    if target_conf_id:
        conf_ids.append(target_conf_id)
    # But target configuration may be not present in cacher (not generated yet)
    # Because of it we must check other snapshots
    snap_confs = set()
    for snap in target['snapshots']:
        conf_id = snap['snapshot_meta_info'].get('conf_id')
        if conf_id:
            snap_confs.add(conf_id)
    for snap in s['current_state']['content']['active_snapshots']:
        snap_confs.add(snap['conf_id'])
    # We check snapshots from the newest to the oldest
    for snap in sorted(snap_confs, reverse=True):
        conf_ids.append(snap)
    return ConfigurationInfo(s['_id'], conf_ids, deploy_engine)


def create_stats_by_cluster():
    return {
        'ISS_SAS': defaultdict(lambda: ClusterStats()),
        'ISS_VLA': defaultdict(lambda: ClusterStats()),
        'ISS_MAN': defaultdict(lambda: ClusterStats()),
        'ISS_MSK': defaultdict(lambda: ClusterStats()),
        'ISS': defaultdict(lambda: ClusterStats()),
        'SAS': defaultdict(lambda: ClusterStats()),
        'MAN': defaultdict(lambda: ClusterStats()),
        'VLA': defaultdict(lambda: ClusterStats()),
        'MSK': defaultdict(lambda: ClusterStats()),
        'total': defaultdict(lambda: ClusterStats()),
    }


def join_stats_by_cluster(result, *stats_by_clusters):
    for stat_by_cluster in stats_by_clusters:
        for cluster_name, stats_by_segment in stat_by_cluster.items():
            for segment, stats in stats_by_segment.items():
                result[cluster_name][segment].join(stats)
    return result


def join_hosts_cpu_usage(result, *hosts_cpu_usages):
    for hosts_cpu_usage in hosts_cpu_usages:
        for host, (cpu_usage, instances) in geventutil.gevent_idle_iter(hosts_cpu_usage.items()):
            result[host][0] += cpu_usage
            result[host][1] += instances
    return result


def diff_stats_by_cluster(left=None, right=None):
    # type: (dict[str, ClusterStats], dict[str, ClusterStats]) -> basestring
    left, right = left or create_stats_by_cluster(), right or create_stats_by_cluster()
    result = {}
    for cluster_name, left_stats_by_segment in left.items():
        for segment, left_stats in left_stats_by_segment.items():
            right_stats = right[cluster_name][segment]
            instances = right_stats.instances - left_stats.instances
            cpu = int(right_stats.cpu_guarantee - left_stats.cpu_guarantee)
            memory = right_stats.memory_guarantee - left_stats.memory_guarantee
            if any([instances, cpu, memory]):
                result[cluster_name] = ("segment:{};inst:{};cpu:{};mem:{}".format(segment, instances, cpu,
                                                                                  memory / 1024 ** 3))
    if result:
        return "TOTAL:{}".format(result.pop('total')) + ' = ' + "|".join([":".join(r) for r in result.items()])


def inc_stat(stat, memory_guarantee, cpu_guarantee):
    stat.memory_guarantee += memory_guarantee or 0
    stat.cpu_guarantee += cpu_guarantee or 0
    stat.instances += 1


def inc_service_stat(service, stat):
    if not service:
        return
    stat.total_services += 1
    summary = service.get('current_state', {}).get('content', {}).get('summary', {}).get('value', 'OFFLINE')
    if summary != 'OFFLINE':
        stat.active_services += 1


def _process_cluster_conf(engine_client, full_conf_id, requests_stats):
    try:
        requests_stats.total += 1
        resp = engine_client.client.getConfiguration(full_conf_id, PORTION, dumpFormat=None)
    except ttypes.NotFoundException:
        requests_stats.not_found += 1
        return False, {}

    stats = create_stats_by_cluster()
    keys = [engine_client.deploy_engine, 'total']
    if engine_client.deploy_engine != 'ISS':
        keys.append(engine_client.deploy_engine[4:])

    hosts_cpu_usage = defaultdict(lambda: [0., 0])

    for i in geventutil.gevent_idle_iter(resp.instances):
        memory_guarantee = None
        cpu_guarantee = None

        node_id = i.slot.split('@')[1]
        node_info = NODES_TOTAL_VCPU_CACHE.get(node_id, NodeInfo(node_id))
        instance_segment = node_info.segment if node_info.segment is not None else NOT_FOUND_SEGMENT

        for container in CONTAINER_NAMES:
            cpu_guarantee = i.properties.get('container/constraints/{}cpu_guarantee'.format(container))
            if cpu_guarantee is not None:
                break
        for container in CONTAINER_NAMES:
            cpu_limit = i.properties.get('container/constraints/{}cpu_limit'.format(container))
            if cpu_limit is not None:
                break

        for container in CONTAINER_NAMES:
            memory_guarantee = i.properties.get('container/constraints/{}memory_guarantee'.format(container))
            if memory_guarantee is not None:
                break

        if cpu_guarantee is not None:
            cpu_guarantee = float(cpu_guarantee.rstrip('c'))
            hosts_cpu_usage[node_id][0] += cpu_guarantee
            hosts_cpu_usage[node_id][1] += 1

        if memory_guarantee is not None:
            # invalid literal for int() with base 10: '4.294967296E10' :((
            memory_guarantee = int(float(memory_guarantee))

        if engine_client.deploy_engine == 'ISS':
            tags = i.properties['properties/BSCONFIG_ITAGS']
            pos = tags.find(' a_geo_')
            cluster = tags[pos + 7:pos + 10]
            inc_stat(stats[cluster.upper()][instance_segment], memory_guarantee, cpu_guarantee)

        for k in keys:
            inc_stat(stats[k][instance_segment], memory_guarantee, cpu_guarantee)

    return stats, hosts_cpu_usage


def _process_conf(full_conf_id, deploy_engine, iss_clients, requests_stats):
    if deploy_engine == 'ISS_MULTI':
        stats = [_process_cluster_conf(iss_clients[c], full_conf_id, requests_stats) for c in ISS_MULTI_CLIENTS]
        if not any([s[0] for s in stats]):
            return False, {}

        return join_stats_by_cluster(*[s[0] for s in stats if s[0]]), join_hosts_cpu_usage(*[s[1] for s in stats if s[1]])

    return _process_cluster_conf(iss_clients[deploy_engine], full_conf_id, requests_stats)


def _process_service(s_id, s, iss_clients, stats, service_stats, requests_stats, initial=False,
                     service_instances_hist=None, all_services_stats=None):
    if not s:
        return
    deploy_engine = _get_deploy_engine(s)
    inc_service_stat(s, service_stats[deploy_engine])
    inc_service_stat(s, service_stats['total'])
    if deploy_engine.startswith('ISS'):
        inc_service_stat(s, service_stats['ISS_TOTAL'])
    conf_info = _get_conf_info(s)
    if not conf_info:
        if SERVICES_CURRENT_STATS.get(s_id):
            log.info('Drop Service {}: {}'.format(
                s_id, diff_stats_by_cluster(left=SERVICES_CURRENT_STATS[s_id])))
            del SERVICES_CURRENT_STATS[s_id]
        return

    if conf_info.deploy_engine != 'ISS_MULTI' and conf_info.deploy_engine not in iss_clients:
        # Ignore ISS_ADMIN and ISS_PRESTABLE
        return

    for conf_id in conf_info.conf_ids:
        full_conf_id = '{}#{}'.format(conf_info.service_id, conf_id)

        exists = SERVICES_CONFIG_CACHE.get(full_conf_id)

        if exists:
            service_conf_stats, hosts_cpu_usage = exists
        else:
            service_conf_stats, hosts_cpu_usage = _process_conf(full_conf_id, conf_info.deploy_engine, iss_clients, requests_stats)

            if not initial and service_conf_stats and not SERVICES_CURRENT_STATS.get(s_id):
                log.info('Add Service {}: {}'.format(s_id, diff_stats_by_cluster(right=service_conf_stats)))

            if service_conf_stats and SERVICES_CURRENT_STATS.get(s_id):
                diff_stats = diff_stats_by_cluster(
                    left=SERVICES_CURRENT_STATS[s_id],
                    right=service_conf_stats
                )
                if diff_stats:
                    log.info('Change Service {}: {}'.format(s_id, diff_stats))

        if hosts_cpu_usage:
            for host, (cpu_usage, instances) in geventutil.gevent_idle_iter(hosts_cpu_usage.items()):
                node_info = NODES_TOTAL_VCPU_CACHE.get(host, NodeInfo(host))
                node_info.vcpu_used += cpu_usage
                node_info.instances += cpu_usage

        if service_conf_stats:
            SERVICES_CONFIG_CACHE[full_conf_id] = service_conf_stats, hosts_cpu_usage
            SERVICES_CURRENT_STATS[s_id] = service_conf_stats
            if service_instances_hist:
                all_instances = [st.instances for st in service_conf_stats['total'].values()]
                service_instances_hist.observe(sum(all_instances) if all_instances else 0)

            join_stats_by_cluster(stats, service_conf_stats)

            if isinstance(all_services_stats, dict):
                all_instances = [st.to_dict() for st in service_conf_stats['total'].values()]
                all_services_stats[s_id] = {}
                for res in all_instances:
                    for k, v in res.items():
                        if k not in all_services_stats[s_id]:
                            all_services_stats[s_id][k] = 0
                        all_services_stats[s_id][k] += v
            return

    log.warning('Skip Service {}:{}'.format(s_id, diff_stats_by_cluster(left=SERVICES_CURRENT_STATS.get(s_id))))


class IssAllocatedReporter(BaseReporter):
    DEFAULT_IDLE_SECONDS = 60 * 60
    NANNY_LIST_SUMMARIES_URL = 'https://nanny.yandex-team.ru/api/repo/ListSummaries/'
    NANNY_SERVICES_URL = 'https://nanny.yandex-team.ru/v2/services/'

    @staticmethod
    def create_iss_clients(config):
        res = {}
        for iss_cluster in config:
            thrift_bin_client = client.ThriftBinClient(
                client_mod=IssService,
                host=iss_cluster['host'],
                port=iss_cluster['port'],
                idle_method=gevent.idle
            )
            res[iss_cluster['name']] = EngineClient(iss_cluster['name'], thrift_bin_client)
        return res

    def __init__(self, token, config, yp_reporter, registry=None):
        # type: (str, dict, YpAllocatedReporter, any) -> None
        self._token = token
        self._registry = registry or ROOT_REGISTRY
        self._reporter_registry = self._registry.path('reporters', self.__class__.__name__)
        self._iss_clients = self.create_iss_clients(config.get('iss_clients'))
        self._nanny_list_summaries_url = config.get('nanny_list_summaries_url', self.NANNY_LIST_SUMMARIES_URL)
        self._nanny_services_url = config.get('nanny_services_url', self.NANNY_SERVICES_URL)
        self._nanny_request_limit = config.get('nanny_request_limit', 200)
        self._pools_count = config.get('pools_count', 50)
        self._dump_services_stats_to_json = config.get('dump_services_stats_to_json', False)
        self._cpu_models_ncpu_info = {}
        self._cpu_models_ncpu_info.update(CPU_MODELS_NCPU_INFO)
        self._cpu_models_ncpu_info.update(config.get('extra_cpu_models_ncpu', {}))
        self._gencfg_yp_valid_overcommitted_segments = config.get('gencfg_yp_valid_overcommitted_segments', [])
        self._yp_reporter = yp_reporter

    def get_service_count(self, session):
        resp = session.get(self._nanny_list_summaries_url)
        resp.raise_for_status()
        return resp.json()['total']

    def get_stats(self, services_instances_histogram, initial=False, all_services_stats=None):
        # type: () -> [dict[str, ClusterStats], dict[str, ServiceStats]]
        session = requests.Session()
        session.headers['Authorization'] = 'OAuth {}'.format(self._token)

        service_count = self.get_service_count(session)

        req_offset = 0
        cluster_stats = create_stats_by_cluster()
        service_stats = collections.defaultdict(ServiceStats)
        requests_stats = RequestsStats()

        pool = GeventPool(size=self._pools_count)

        while req_offset < service_count:

            with self._reporter_registry.get_histogram('get-nanny-services').timer():
                response = session.get(self._nanny_services_url, params={
                    'skip': req_offset,
                    'limit': self._nanny_request_limit}
                                       )

            if not response.ok:
                response.raise_for_status()

            with self._reporter_registry.get_histogram('parsing-nanny-services').timer():
                services = ujson.loads(response.content)['result']

            for service in services:
                g = gevent.Greenlet(_process_service, service['_id'], service, self._iss_clients, cluster_stats,
                                    service_stats, requests_stats, initial, services_instances_histogram,
                                    all_services_stats)
                pool.start(g)

            req_offset += self._nanny_request_limit
            log.info('[{} / {}] Processed requests: ({})'.format(req_offset, service_count, requests_stats))
        pool.join()
        return cluster_stats, service_stats

    def download_gencfg_hosts_data(self, retry_count=3, idle_sleep=10):
        try_count = 0
        while try_count < retry_count:
            try_count += 1
            try:
                r = requests.get(GENCFG_HOSTS_DATA_URL, stream=True)
                r.raise_for_status()
                io = StringIO()
                for chunk in r.iter_content(chunk_size=65536):
                    if chunk:  # filter out keep-alive new chunks
                        io.write(chunk)
                io.seek(0)
                return lz4.block.decompress(io.read())
            except Exception as e:
                if try_count == retry_count:
                    raise e
                log.exception(e)
                log.warning("Can't download GENCFG_HOSTS_DATA: {} / {}, sleep {} seconds...".format(
                    try_count, retry_count, idle_sleep))
                gevent.sleep(idle_sleep)

    def build_hosts_cache(self):
        NODES_TOTAL_VCPU_CACHE.clear()
        log.info('Start Load ALL_RTC group instances... exists host: {}'.format(len(NODES_TOTAL_VCPU_CACHE)))
        hosts_data = self.download_gencfg_hosts_data(retry_count=3, idle_sleep=20)
        hosts = ujson.loads(hosts_data)
        for host in geventutil.gevent_idle_iter(hosts):
            hostname = "{}{}".format(host['name'], host['domain'])
            node_info = NODES_TOTAL_VCPU_CACHE.get(hostname, NodeInfo(hostname))
            node_info.vcpu = float(self._cpu_models_ncpu_info[host['model']])
            node_info.source = 'gencfg'
            node_info.cluster = host['dc']
            node_info.segment = 'default'  # is not yp segment, just default, need for not_found hosts
            node_info.rtc_tag = 'rtc' in host['walle_tags']

        log.info('Finish load ALL_RTC group instances with {} hosts'.format(len(NODES_TOTAL_VCPU_CACHE)))
        log.info('Start update hosts from yp...')
        found_gencfg_yp_hosts = 0
        yp_managed_hosts = set()
        for cluster, node_id, node in self._yp_reporter.get_managed_nodes():
            yp_managed_hosts.add(node_id)
            node_info = NODES_TOTAL_VCPU_CACHE.get(node_id, NodeInfo(node_id))
            node_info.cluster = cluster
            node_info.yp_segment = node.segment
            if node_info.source is None:
                node_info.source = 'yp'
                node_info.deploy_engine = 'yp'
                # YP may contain very old dismounted hosts which is not
                # present in GENCFG_HOSTS_DATA anymore, we set segment for these hosts
                node_info.segment = 'default'
            if node.migration_source == 'gencfg':
                node_info.deploy_engine = 'gencfg_yp'
                found_gencfg_yp_hosts += 1

        log.info('Finish update hosts from yp with total {} with {} gencfg yp'.format(
            len(NODES_TOTAL_VCPU_CACHE), found_gencfg_yp_hosts))

        log.info('Start update gencfg_yp hosts cpu from yp...')
        total_vcpu_decrement = 0
        total_vcpu_decrement_hosts = 0
        for node_id, resource in self._yp_reporter.get_all_nodes_vcpu():
            if node_id not in yp_managed_hosts:
                # There are CPU resources of not managed nodes in YP.
                # YP does not schedule pods onto such nodes. So we must not
                # decrement CPU available for gencfg instances on such nodes
                # For details see https://st.yandex-team.ru/YP-2059
                continue
            node_info = NODES_TOTAL_VCPU_CACHE.get(node_id, NodeInfo(node_id))

            if not node_info.vcpu:
                node_info.vcpu = resource.spec.real_vcpu + 2.0

            if node_info.deploy_engine == 'gencfg_yp':
                if node_info.yp_segment in self._gencfg_yp_valid_overcommitted_segments:
                    # Some special YP segments have CPU overcommit by design: https://st.yandex-team.ru/YP-2059
                    # For these segments we try to calculate overcommit as precise as possible:
                    # consider only CPUs which are used by pods, not CPU which is available in YP
                    cpu_allocated_by_yp = resource.used_vcpu / resource.spec.cpu_to_vcpu_factor / 1000.0
                else:
                    cpu_allocated_by_yp = resource.spec.real_vcpu
                node_info.vcpu -= cpu_allocated_by_yp
                total_vcpu_decrement_hosts += 1
                total_vcpu_decrement += cpu_allocated_by_yp

        log.info('Finish update gencfg_yp hosts cpu from yp with {}c in {} hosts'.format(
            total_vcpu_decrement,
            total_vcpu_decrement_hosts
        ))

    def run(self, start_at, initial=False):
        instance_metrics_names = {
            'ISS_SAS': ('sas', 'cacher'),
            'ISS_MAN': ('man', 'cacher'),
            'ISS_VLA': ('vla', 'cacher'),
            'ISS_MSK': ('msk', 'cacher'),
            'ISS': ('global', 'cacher'),

            'SAS': ('sas', 'geo'),
            'MAN': ('man', 'geo'),
            'VLA': ('vla', 'geo'),
            'MSK': ('msk', 'geo'),

            'total': ('global', 'total')
        }
        service_metrics_names = {
            'ISS_SAS': 'iss-sas',
            'ISS_MAN': 'iss-man',
            'ISS_VLA': 'iss-vla',
            'ISS_MSK': 'iss-msk',
            'ISS': 'iss-global',

            'YP_LITE': 'yp-lite',

            'ISS_TOTAL': 'iss-total',
            'ISS_MULTI': 'iss-multi',
            'ISS_ADMIN': 'iss-admin',
            'total': 'total',
        }
        log.info('Start load iss_stats')

        self._reporter_registry.get_gauge('last-run-start').set(start_at)

        all_services_stats = None
        if self._dump_services_stats_to_json:
            all_services_stats = {}

        timer_buckets = [100.0, 200.0, 300.0, 400.0, 600.0, 1000.0, 1500.0, 2000.0, 3000.0]

        with self._reporter_registry.get_histogram('fetch-hosts-data', timer_buckets).timer():
            self.build_hosts_cache()

        with self._reporter_registry.get_histogram('fetch-data', timer_buckets).timer():
            pods_count_buckets = [0, 1, 2, 3, 4, 5,
                                  10, 20, 30, 40, 50, 60, 70,
                                  100, 200, 300, 400, 500, 600, 700, 800, 900, 1000,
                                  1500, 2000, 2500, 3000, 4000, 5000, 6000, 7000, 8000, 9000, 10000,
                                  11000, 12000, 15000, 20000, 30000, 50000, 100000]
            services_pods_count_hist = self._registry.get_absolute_histogram('iss-nanny-service-pods-count',
                                                                             buckets=pods_count_buckets)
            with services_pods_count_hist as h:
                clusters_stats, services_stats = self.get_stats(h, initial, all_services_stats)

        if self._dump_services_stats_to_json:
            with open('./services_stats_{}.json'.format(start_at), 'w') as fp:
                ujson.dump(all_services_stats, fp)

        log.info('Services Instances Histogram: {}'.format(services_pods_count_hist.fmt()))
        log.info('Fetch iss allocated stats by {}s'.format(int(time.time() - start_at)))

        gencfg_yp_hosts_vcpu = 0
        gencfg_yp_hosts_vcpu_used = 0
        gencfg_yp_hosts_over_commit_cpu = 0

        no_rtc_tag_vcpu = 0
        no_rtc_tag_vcpu_used = 0
        yp_segment_overcommits = defaultdict(float)
        for node_id in geventutil.gevent_idle_iter(NODES_TOTAL_VCPU_CACHE):
            node_info = NODES_TOTAL_VCPU_CACHE.get(node_id, NodeInfo(node_id))
            if node_info.deploy_engine == 'gencfg_yp':
                gencfg_yp_hosts_vcpu += node_info.vcpu if node_info.vcpu is not None else 0
                gencfg_yp_hosts_vcpu_used += node_info.vcpu_used

            if not node_info.rtc_tag:
                no_rtc_tag_vcpu += node_info.vcpu if node_info.vcpu is not None else 0
                no_rtc_tag_vcpu_used += node_info.vcpu_used

            if node_info.vcpu is not None and node_info.vcpu_used > node_info.vcpu:
                cluster = node_info.cluster.upper()
                node_info.over_commit_cpu = node_info.vcpu_used - node_info.vcpu
                if cluster in ['IVA', 'MYT']:
                    cluster = 'MSK'
                yp_segment_overcommits[node_info.yp_segment] += node_info.over_commit_cpu
                clusters_stats[cluster][node_info.segment].cpu_over_quota += node_info.over_commit_cpu
                clusters_stats['total'][node_info.segment].cpu_over_quota += node_info.over_commit_cpu

                if node_info.deploy_engine == 'gencfg_yp':
                    gencfg_yp_hosts_over_commit_cpu += node_info.over_commit_cpu

        registry = self._registry.path('watch', 'gencfg-yp')
        registry.get_gauge('vcpu-count').set(gencfg_yp_hosts_vcpu)
        registry.get_gauge('vcpu-used-count').set(gencfg_yp_hosts_vcpu_used)
        registry.get_gauge('vcpu-over-commit-count').set(gencfg_yp_hosts_over_commit_cpu)

        registry = self._registry.path('watch', 'no-rtc-tag')
        registry.get_gauge('vcpu-count').set(no_rtc_tag_vcpu)
        registry.get_gauge('vcpu-used-count').set(no_rtc_tag_vcpu_used)

        # We have signals limit in YASM.
        # https://wiki.yandex-team.ru/golovan/userdocs/stat-handle/#protokol
        # Some random signals will be silently dropped when we exceed this limit (it may be some
        # the most important signals). To avoid this situation we create signals for YP segment
        # overcommits only for 5 first segments with the most significant overcommit.
        max_overcommitted_segments = sorted(yp_segment_overcommits.items(), key=lambda x: -x[1])
        for k, v in max_overcommitted_segments[:5]:
            if not k:
                continue
            registry = self._registry.path('watch', 'yp-segments', k)
            registry.get_gauge('cpu-over-commit').set(int(v))

        for cluster_name, cluster_stats_by_segment in clusters_stats.items():
            for segment, cluster_stats in cluster_stats_by_segment.items():
                log.info('ISS Allocated {}: {} \t {}'.format(cluster_name, segment, cluster_stats))
                metrics_path = list(instance_metrics_names[cluster_name]) + [segment if segment is not None else 'none']
                registry = self._registry.path('allocated', 'iss', *metrics_path)
                registry.get_gauge('memory-gb').set(cluster_stats.memory_gb)
                registry.get_gauge('cpu-cores').set(int(cluster_stats.cpu_guarantee))
                registry.get_gauge('cpu-over-commit').set(int(cluster_stats.cpu_over_quota))
                registry.get_gauge('cpu-real-v2').set(int(cluster_stats.cpu_guarantee) - int(cluster_stats.cpu_over_quota))
                registry.get_gauge('pods-count').set(cluster_stats.instances)

        for deploy_engine, service_stats in services_stats.items():
            name = service_metrics_names.get(deploy_engine)
            if not name:
                continue
            registry = self._registry.path('services', name)
            registry.get_gauge('total').set(service_stats.total_services)
            registry.get_gauge('active').set(service_stats.active_services)
            log.info('ISS Services {} \t {}'.format(deploy_engine, service_stats))

        self._registry.path('reporters', self.__class__.__name__).get_gauge('last-run-done').set(int(time.time()))
