from __future__ import absolute_import

import six
import copy
import enum
import json
import random
import logging
import itertools as it
import collections


GLOBAL_NAMESPACE = "sandbox"
PRODUCTION_NAMESPACE = "{}.production".format(GLOBAL_NAMESPACE)
PRE_PRODUCTION_NAMESPACE = "{}.pre_production".format(GLOBAL_NAMESPACE)
API_URL = "http://juggler-api.search.yandex.net/api"
PUSH_URL = "http://juggler-push.search.yandex.net"
PRODUCTION_KEY = 0
PRE_PRODUCTION_KEY = 1


class StringEnum(str, enum.Enum):
    def __str__(self):
        return self.value


class CheckStatus(StringEnum):
    OK = "OK"
    WARN = "WARN"
    CRIT = "CRIT"


class Severity(StringEnum):
    DISASTER = "disaster"
    CRITICAL = "critical"
    MAJOR = "major"
    MINOR = "minor"


# Logins
TELEGRAM_CHAT = ["sandbox-dev"]
SANDBOX_TEAM = ["korum", "yetty", "r-vetrov", "aripinen", "artanis", "ilia-vashurov", "belesev"]
DUTY_LOGIN = "@svc_sandbox:sre-2-0"
DUTY_LOGINS = [DUTY_LOGIN]

# Children hosts
SANDBOX_SERVERS = "search_instrum-sandbox_server"
SANDBOX_MONGO = "search_instrum-sandbox_mongo"
SANDBOX_API = "search_instrum-sandbox_api"
SANDBOX_STORAGES = "search_instrum-sandbox_stg"
SANDBOX_CLIENTS = "search_instrum-sandbox_client"
SANDBOX_PREPROD_CLIENTS = "search_instrum-sandbox_preprod_client"
SANDBOX_LINUX_CLIENTS = "search_instrum-sandbox_linux"
SANDBOX_MONGO_CONFIG_SERVERS = "search_instrum-sandbox_server@tag=sandbox_mongocfg"
REPORTING_SERVERS = SANDBOX_MONGO_CONFIG_SERVERS  # most of the passive checks run on these servers
REPORTING_SERVERS_PREPROD = "search_instrum-sandbox_preprod_server"
SANDBOX_ZOOKEEPER = "search_instrum-sandbox_zk"
SANDBOX_PROXY = "proxy.sandbox.yandex-team.ru"

# Other constants
EXTRA_SHARDED_MONGO_DCS = ["sas", "vla"]


class DateTime(object):
    def __init__(self, hh, mm=0):
        self.hh = hh
        self.mm = mm

    def __str__(self):
        return "{}:{:02d}".format(self.hh, self.mm)

    def __int__(self):
        return self.hh


DAY_START = DateTime(10)
DAY_END = DateTime(21)

MONDAY = 1
FRIDAY = 5
SUNDAY = 7


class Namespace(collections.namedtuple("Namespace", ("name", "checks"))):
    def get_checks(self):
        return (
            check.to_dict(self.name)
            for check in self.checks
        )


class Check(object):
    def __init__(self, check, notifications=None, aggregator=None, force_ok=None, check_options=None):
        self.check = check
        self.notifications = notifications
        self.aggregator = aggregator
        self.force_ok = force_ok
        self.check_options = check_options

    def to_dict(self, namespace):
        result = {"namespace": namespace}
        result.update(self.check.to_dict(namespace))
        if self.notifications:
            result.update(notifications=[n.to_dict() for n in self.notifications])
        if self.aggregator:
            result.update(self.aggregator.to_dict(self.check))
        if self.force_ok:
            result.setdefault("aggregator_kwargs", {}).update(
                unreach_mode="force_ok",
                unreach_service=[
                    dict(check=self._get_target_check_name(check, namespace))
                    for check in self.force_ok
                ]
            )
        if self.check_options:
            result.update(check_options=self.check_options)
        return result

    def _get_target_check_name(self, check, namespace):
        if check.host == self.check.host:
            return ":{}".format(check.service)
        return "{}.{}:{}".format(namespace, check.host, check.service)


# noinspection PyPep8Naming
class AggregateCheckMeta(type):
    __fields__ = ["host", "service", "refresh_time", "ttl", "tags", "description", "flaps", "check_options"]

    def __new__(mcs, name, bases, namespace):
        cls = super(mcs, mcs).__new__(mcs, name, bases, namespace)
        cls.__field_values__ = {
            f: copy.deepcopy(getattr(cls, f))
            for f in mcs.__fields__
        }
        return cls


# Aggregate checks
@six.add_metaclass(AggregateCheckMeta)
class AggregateCheck(object):
    """
    Docs:
      - https://docs.yandex-team.ru/juggler/pipeline
      - https://docs.yandex-team.ru/juggler/aggregates/flaps
    """
    host = None
    service = None
    refresh_time = 90
    ttl = 900
    tags = []
    description = None
    flaps = None  # None = no flap suppressing, {} = defaults (stable_time=600, critical_time=stable_time * 2)
    severity = Severity.MINOR
    check_options = {}

    @classmethod
    def to_dict(cls, namespace):
        # noinspection PyUnresolvedReferences
        return dict(
            cls.__field_values__,
            host=".".join((namespace, cls.host)),
            tags=[GLOBAL_NAMESPACE, cls.severity] + cls.tags
        )

    @classmethod
    def __event(cls, description, status):
        import sandbox.common.types.misc as ctm
        from sandbox import common
        juggler = common.rest.Client(base_url=PUSH_URL)
        installation_type = common.config.Registry().common.installation
        if installation_type not in ctm.Installation.Group.NONLOCAL:
            return
        installation_type = installation_type.lower()
        # noinspection PyUnresolvedReferences
        event = dict(
            description=description,
            host=".".join((GLOBAL_NAMESPACE, installation_type, cls.host)),
            service=cls.service,
            status=str(status),
            tags=[GLOBAL_NAMESPACE, installation_type]
        )
        return juggler.events(events=[event])

    @classmethod
    def ok(cls, description):
        return cls.__event(description, CheckStatus.OK)

    @classmethod
    def warning(cls, description):
        return cls.__event(description, CheckStatus.WARN)

    @classmethod
    def critical(cls, description):
        return cls.__event(description, CheckStatus.CRIT)


class ActiveCheck(AggregateCheck):
    """
    Docs: https://docs.yandex-team.ru/juggler/aggregates/actives
    """
    active = None
    active_kwargs = None

    @classmethod
    def to_dict(cls, namespace):
        return dict(
            super(ActiveCheck, cls).to_dict(namespace),
            active=cls.active,
            active_kwargs=cls.active_kwargs
        )


class PassiveCheck(AggregateCheck):
    """
    Subclass for checks that run services on every physical host of a group.
    All services available in Sandbox package are listed in sandbox/juggler/MANIFEST.json file -- do take a look.
    """
    # Example: a check called "sandbox.production.host_health" relies on "sandbox_health_check" service on a host,
    # for which "sandbox_health_check_general.py" is responsible. In this case, "sandbox_health_check" goes here.
    local_service = None


class ReportingServersReachability(ActiveCheck):
    host = "mongocfg"
    service = "UNREACHABLE"
    active = "icmpping"
    description = "Availability of Sandbox servers where passive Juggler checks work"
    severity = Severity.CRITICAL


class CheckSemaphoresConsistency(PassiveCheck):
    host = "check_semaphores"
    service = "consistency"
    tags = ["semaphore"]
    description = "Checks if semaphores' values in consistent with values calculated from tasks"
    severity = Severity.MINOR


class CheckRegularAcceptance(PassiveCheck):
    host = "regular_acceptance"
    service = "broken"
    tags = ["acceptance"]
    description = "Regular acceptance runs: https://sandbox-prestable.yandex-team.ru/scheduler/1000139/view"
    ttl = 14400
    severity = Severity.MINOR


class EmergencyChannel(PassiveCheck):
    """
    Signalize about unreplied message in sandbox-emergency@ Telegram group
    (point of entry is `services.modules.TelegramBot`).

    Rules (all statuses are set by the respective service process):

    - **WARNING** after ``server.services.telegram_bot.emergency.warning_timeout``
    - **CRITICAL** after ``server.services.telegram_bot.emergency.critical_timeout``
    - **OK** if a reply is received
    """
    host = "emergency_channel"
    service = "unreplied_message"
    tags = ["telegram"]
    description = "Notify about unreplied message in sandbox-emergency@ Telegram group"
    refresh_time = 120
    ttl = 600
    severity = Severity.CRITICAL


class TasksAutodeploy(PassiveCheck):
    """
    Compare revision of tasks code on servers to that of sandbox/projects in Arcadia and warn if it's too old.
    "Too old" is regulated by `breakage_timeout` setting in TelegramBot service process' settings
    """
    host = "tasks_autodeploy"
    service = "broken"
    tags = ["tasks_autodeploy"]
    description = "Warn if tasks code on servers is too old, compared to Arcadia"
    refresh_time = 60
    ttl = 600
    severity = Severity.MAJOR


class SandboxSLO(PassiveCheck):
    """
    Keep an eye on Sandbox service level objectives and report if they are broken
    (that is, if an API method's execution time percentime over a small window exceeds some predefined value)
    """
    host = "sandbox_slo"
    service = "broken"
    tags = ["kpi", "sla"]
    description = (
        "Warn that certain API methods execute for longer that they should "
        "according to https://wiki.yandex-team.ru/sandbox/sla/#api-methods-sla"
    )
    refresh_time = 60
    ttl = 600
    local_service = "sandbox_slo"
    severity = Severity.MAJOR


class TaskSessionConsistency(PassiveCheck):
    """
    Signalize about tasks in EXECUTE status without session and about tasks with expired session
    """
    host = "task_session_consistency"
    service = "session_consistency"
    tags = ["task_status_checker"]
    description = __doc__.strip()
    refresh_time = 240
    ttl = 600
    severity = Severity.MAJOR


class TasksTransientStatuses(PassiveCheck):
    """
    Signalize about tasks stalled in transient status: STOPPING, FINISHING, ASSIGNED
    """
    host = "tasks_transient_statuses"
    service = "transient_statuses"
    tags = ["task_status_checker"]
    description = __doc__.strip()
    refresh_time = 240
    ttl = 600
    severity = Severity.MAJOR


class XMLRPCExceptionCount(PassiveCheck):
    """
    Count exceptions in tasks code happened during XMLRPC communication (sent from a client on task finish)
    Respective graph: https://grafana.yandex-team.ru/d/dYMWfapik/sandbox-exception-statistics ("XMLRPC errors" panel)
    """
    host = "xmlrpc"
    service = "exception_count"
    tags = ["xmlrpc"]
    description = "SANDBOX-5872"
    ttl = 600
    local_service = "sandbox_xmlrpc_exceptions"
    severity = Severity.MAJOR


class AggregateProxyCheck(PassiveCheck):
    """
    Base class for proxy checks in Solomon
    See more on https://solomon.yandex-team.ru/admin/projects/sandbox/alerts
    """
    host = "proxy"
    tags = ["proxy"]


class CheckProxyHttpErrors(AggregateProxyCheck):
    service = "http_errors"
    description = "Check for proxy HTTP 500 and 503 responses"
    severity = Severity.DISASTER


class CheckProxyHttpOk(AggregateProxyCheck):
    service = "http_ok"
    description = "Check for proxy HTTP ok responses"
    severity = Severity.CRITICAL


class CheckProxyWorkerCount(AggregateProxyCheck):
    service = "workers_busy"
    description = "Check for proxy worker count"
    severity = Severity.MINOR


class CheckProxyCertificateOk(ActiveCheck):
    host = "proxy"
    service = "https"
    active = "https"
    active_kwargs = dict(
        ok_codes=[200, 301, 302],
        path="/http_check",
    )
    flaps = {"stable_time": 120, "critical_time": 200}
    tags = ["proxy"]
    severity = Severity.MINOR


class CheckMetricsWakeupDuration(PassiveCheck):
    """
    Check for production metrics from Solomon
    https://solomon.yandex-team.ru/admin/projects/sandbox/alerts/task_wakeup_duration_multislot
    """
    host = "metrics"
    service = "task_wakeup_duration_multislot"
    description = "Check for task wakeup time from WAIT_TASK status"
    severity = Severity.MAJOR
    tags = ["metrics"]


class CheckApiRequestsInProgress(PassiveCheck):
    """
    Check for production metrics from Solomon
    https://solomon.yandex-team.ru/admin/projects/sandbox/alerts/sandbox_serviceapi_requests_in_progress
    """
    host = "api"
    service = "requests_in_progress"
    description = "Check for API requests in progress"
    severity = Severity.MAJOR
    tags = ["api"]


class CheckApiRequestsRejected(PassiveCheck):
    """
    Check for production metrics from Solomon
    https://solomon.yandex-team.ru/admin/projects/sandbox/alerts/sandbox_serviceapi_requests_rejected
    """
    host = "api"
    service = "requests_rejected"
    description = "Check for rejected API requests comparing to processed"
    severity = Severity.DISASTER
    tags = ["api"]


class CheckMongodbOpsDuration(PassiveCheck):
    host = "mongodb"
    service = "ops_duration"
    tags = ["mongodb"]
    description = "Check for Mongodb operations duration"
    severity = Severity.MINOR


class CheckMongodbActiveWriters(PassiveCheck):
    host = "mongodb"
    service = "active_writers"
    tags = ["mongodb"]
    description = "Check for Mongodb number of active writers"
    severity = Severity.MINOR


class FastboneSandbox(PassiveCheck):
    host = "fastbone"
    service = "unavailable"
    tags = ["fastbone", "network", "clients"]
    description = (
        "Fastbone network interface availability. "
        "On more information, see: https://wiki.yandex-team.ru/sandbox/dev/support/#check-fastbone"
    )
    local_service = "fastbone_sandbox"
    severity = Severity.CRITICAL


PURPOSE_TAGS = [
    "GENERIC",
    "SERVER",
    "STORAGE",
    "BROWSER",
    "YABS",
    "PORTOD",
]

DENSITY_TAGS = [
    "MULTISLOT",
    "CORES24",
]


DEAD_CLIENTS_PREFIX = "CountDeadClients"
dead_clients = {}
dead_clients_preprod = {}


def _create_check_clients_dead(_class_name, _class_suffix, _tags, environment):
    return type(
        _class_name,
        (PassiveCheck,),
        {
            "host": "clients{}".format(_class_suffix),
            "service": "dead",
            "ttl": 600,
            "tags": ["clients"],
            "description": (
                "Count clients that are dead from Sandbox's point of view, and ring the alarm "
                "if there's too many of them. For thresholds, see sandbox/juggler/sandbox_dead_client.py"
            ),
            "local_service": "sandbox_dead_clients",
            "severity": Severity.MAJOR,
            "check_options": {
                "args": ["&".join(_tags), environment]
            }
        }
    )


for tags in list(it.chain(
    it.product(PURPOSE_TAGS, DENSITY_TAGS),
    [("WINDOWS", ), ("OSX", "~MAINTENANCE"), ("MOBILE_MONOREPO", ), ()],
)):
    class_suffix = "__".join([tag.replace("~", "NOT_") for tag in tags])
    if class_suffix:
        class_suffix = "_" + class_suffix

    class_name = "{}{}".format(DEAD_CLIENTS_PREFIX, class_suffix)
    dead_clients[class_name] = _create_check_clients_dead(class_name, class_suffix, tags, "PRODUCTION")
    dead_clients_preprod[class_name] = _create_check_clients_dead(class_name, class_suffix, tags, "PRE_PRODUCTION")


def dead_clients_checks_generator():
    for _, check_class in dead_clients.items():
        yield Check(
            check=check_class,
            notifications=[
                critical_call_escalation_notification,
                on_status_change_critical_only_notification_email,
            ],
            aggregator=Aggregator(
                aggregator="logic_and",
                children=REPORTING_SERVERS
            ),
            force_ok=[ReportingServersReachability]
        )


def dead_clients_checks_generator_preprod():
    for _, check_class in dead_clients_preprod.items():
        yield Check(
            check=check_class,
            aggregator=Aggregator(
                aggregator="logic_and",
                children=REPORTING_SERVERS_PREPROD
            ),
            force_ok=[ReportingServersReachability]
        )


class AssignedTasksAge(PassiveCheck):
    host = "assigned_tasks"
    service = "age"
    tags = ["assigned_tasks", "kpi"]
    description = (
        "Check for tasks that are stuck in ASSIGNED state for too long. "
        "For what too long is, see sandbox/juggler/sandbox_assigned_task_age.py"
    )
    local_service = "sandbox_assigned_task_age"
    severity = Severity.MINOR


class EnqueueingTasksAge(PassiveCheck):
    host = "enqueueing_tasks"
    service = "age"
    tags = ["enqueueing_tasks", "kpi"]
    description = (
        "Check for tasks that are stuck in ENQUEUING state for too long. "
        "For what too long is, see sandbox/juggler/sandbox_enqueueing_task_age.py"
    )
    local_service = "sandbox_enqueuing_task_age"
    severity = Severity.MINOR


class ExecutingTasksYabsYtstat(PassiveCheck):
    host = "executing_tasks"
    service = "yabs_ytstat"
    local_service = "sandbox_executing_tasks_yabs_ytstat"
    tags = ["executing_tasks", "kpi", "yabs"]
    description = "SANDBOX-5252"
    severity = Severity.MINOR


class MongoCfgAvailability(PassiveCheck):
    host = "mongocfg"
    service = "down"
    tags = ["mongocfg", "mongo"]
    description = "Ensure that the whole mongocfg cluster is available (hosts: k@sandbox_mongocfg)"
    local_service = "sandbox_mongocfg"
    severity = Severity.DISASTER


class ServiceProcessesDelay(PassiveCheck):
    host = "services"
    service = "down"
    tags = ["kpi"]
    description = (
        "Check that all service processes finish their execution loops in time, which is different "
        "for every service (\"notificaton_timeout\" property)."
    )
    local_service = "sandbox_service_threads_check"
    severity = Severity.CRITICAL


class ServiceQActivity(PassiveCheck):
    host = "serviceq"
    service = "down"
    tags = ["kpi"]
    description = "Check that Sandbox queue dispatcher keeps assigning tasks to clients"
    local_service = "sandbox_serviceq_activity"
    severity = Severity.DISASTER


class ServiceQCPU(PassiveCheck):
    host = "serviceq"
    service = "cpu"
    ttl = 300
    tags = ["kpi"]
    description = "Check that Sandbox queue dispatcher do not consume too much CPU"
    local_service = "sandbox_serviceq_cpu"
    severity = Severity.CRITICAL


class MdsCleanerDelay(PassiveCheck):
    host = "mds_cleaner_delay"
    service = "mds_cleaner"
    ttl = 300
    tags = ["kpi"]
    description = "Check that Sandbox mds_cleaner process is not delayed"
    local_service = "sandbox_mds_cleaner_delay"
    severity = Severity.MINOR


class QueueAutocheckPrecommits(PassiveCheck):
    host = "queue_autocheck_precommits"
    service = "down"
    ttl = 600
    flaps = {"stable_time": 180, "critical_time": 600}
    tags = ["kpi"]
    description = "Checks queue of precommit checks"
    local_service = "sandbox_queue_autocheck_precommits"
    severity = Severity.CRITICAL


class QueueAutocheckPostcommits(PassiveCheck):
    host = "queue_autocheck_postcommits"
    service = "down"
    ttl = 600
    flaps = {"stable_time": 180, "critical_time": 600}
    tags = ["kpi"]
    description = "Checks queue of postcommit checks"
    local_service = "sandbox_queue_autocheck_postcommits"
    severity = Severity.CRITICAL


class RejectedTasks(PassiveCheck):
    host = "rejected_tasks"
    service = "limit_exceeded"
    ttl = 600
    tags = ["clients", "assigned_tasks"]
    description = "Checks the amount or rejected tasks and clients, that have rejected them"
    local_service = "sandbox_rejected_tasks"
    severity = Severity.MAJOR


class Coredumps(PassiveCheck):
    host = "coredumps"
    service = "coredumps_limit_exceeded"
    ttl = 600
    description = "Checks number of coredumps on servers"
    local_service = "coredumps"
    severity = Severity.MAJOR


class StepUnprocessedEvents(PassiveCheck):
    host = "step"
    service = "down"
    tags = ["step"]
    description = "Checks number of unprocessed STEP events"
    local_service = "step_unprocessed_events"
    severity = Severity.MAJOR


class ZookeeperNodes(PassiveCheck):
    host = "health"
    service = "down"
    ttl = 300
    flaps = {"stable_time": 180, "critical_time": 600}
    tags = ["zookeeper"]
    description = "Checks health of zookeeper nodes"
    local_service = "sandbox_zookeeper"
    severity = Severity.CRITICAL


# Server checks
class ServerCPULoad(PassiveCheck):
    """
    Check average CPU usage on servers
    See more: https://solomon.yandex-team.ru/admin/projects/sandbox/alerts/sandbox_server_cpu
    """
    host = "server"
    service = "cpu_load"
    description = "Check average CPU usage on servers"
    local_service = "cpu_load"
    severity = Severity.MAJOR


class ServerHWErrs(PassiveCheck):
    """
    Provided by juggler/bundles/ps_search2/modules-available/hw_errs.sh
    """
    host = "server"
    service = "hw_errs"
    description = "Check kernel reported errors on servers"
    local_service = "hw_errs"
    severity = Severity.MAJOR


class ServerHTTP(ActiveCheck):
    host = "server"
    service = "http"
    active = "http"
    active_kwargs = dict(
        ok_codes=[200],
        path="http_check",
        port=8080,
        timeout=5
    )
    flaps = dict(
        stable_time=300,
        critical_time=600,
        boost_time=0
    )
    severity = Severity.MAJOR


class ServerRAID(PassiveCheck):
    """
    Provided by juggler/bundles/ps_search2/modules-available/raid.sh
    """
    host = "server"
    service = "raid"
    description = "Check for software RAID"
    local_service = "raid"
    severity = Severity.MAJOR


class ServerNTPStratum(PassiveCheck):
    """
    Provided by juggler/bundles/ps_search2/modules-available/ntp_stratum.sh
    """
    host = "server"
    service = "ntp_stratum"
    description = "Check ntp server synchronization"
    local_service = "ntp_stratum"
    severity = Severity.MAJOR


class ServerFileserver(ActiveCheck):
    host = "server"
    service = "fileserver"
    active = "http"
    active_kwargs = dict(
        ok_codes=[200],
        path="/",
        port=13578,
        timeout=4
    )
    severity = Severity.MAJOR


class ServerZookeperMntr(PassiveCheck):
    """
    Provided by juggler/bundles/ps_search2/modules-available/zookeeper_mntr.sh
    """
    host = "server"
    service = "zookeeper_mntr"
    description = "Check zookeeper server state is available"
    local_service = "zookeeper_mntr"
    severity = Severity.CRITICAL


class ServerZookeeper(PassiveCheck):
    """
    Provided by juggler/bundles/ps_search2/modules-available/zookeeper.sh
    """
    host = "server"
    service = "zookeeper"
    description = "Check zookeeper server is up"
    local_service = "zookeeper"
    severity = Severity.CRITICAL


class ServerSSH(ActiveCheck):
    host = "server"
    service = "ssh"
    active = "ssh"
    active_kwargs = dict(timeout=20)
    severity = Severity.MAJOR


class ServerUnreachable(ActiveCheck):
    host = "server"
    service = "UNREACHABLE"
    active = "icmpping"
    severity = Severity.CRITICAL


class ServerUnreachableDisaster(ActiveCheck):
    host = "server"
    service = "UNREACHABLE_DISASTER"
    active = "icmpping"
    severity = Severity.DISASTER


class ServerCheckSkynet(PassiveCheck):
    """
    Provided by juggler/bundles/ps_search2/modules-available/check_skynet.pl
    """
    host = "server"
    service = "check_skynet"
    description = "Check SkyNet services"
    local_service = "check_skynet"
    severity = Severity.MAJOR


class ServerUnispace(PassiveCheck):
    """
    Provided by juggler/bundles/ps_search2/modules-available/unispace.pl
    """
    host = "server"
    service = "unispace"
    description = "Checks all available space on all local filesystems and alert when the usage exceeds the limit"
    local_service = "unispace"
    severity = Severity.MAJOR


class ServerErrataUpdates(PassiveCheck):
    """
    Provided by juggler/bundles/ps_search2/modules-available/errata_updates.py
    """
    host = "server"
    service = "errata_updates"
    ttl = 14400
    tags = ["security-updates"]
    description = "Check if there are security updates available"
    local_service = "errata_updates"
    severity = Severity.MINOR


# Server step checks
class ServerStepClockEvent(PassiveCheck):
    """
    Provided by juggler/bundles/ps_search2/modules-available/step_clock_event
    """
    host = "server_step"
    service = "step_clock_event"
    local_service = "step_clock_event"
    severity = Severity.MAJOR


# Storage checks
class StorageRAID(PassiveCheck):
    """
    Provided by juggler/bundles/ps_search2/modules-available/raid.sh
    """
    host = "storage"
    service = "raid"
    description = "Check for software RAID"
    local_service = "raid"
    severity = Severity.MINOR


class StorageFileserver(ActiveCheck):
    host = "storage"
    service = "fileserver"
    active = "http"
    active_kwargs = dict(
        ok_codes=[200],
        path="/",
        port=13578,
        timeout=4
    )
    severity = Severity.CRITICAL


class StorageHWErrors(PassiveCheck):
    """
    Provided by juggler/bundles/ps_search2/modules-available/hw_errs.sh
    """
    host = "storage"
    service = "hw_errs"
    description = "Check kernel reported errors on servers"
    local_service = "hw_errs"
    severity = Severity.MAJOR


class StorageMeta(PassiveCheck):
    host = "storage"
    service = "META"
    local_service = "META"
    severity = Severity.MAJOR


class StorageCheckSkynet(PassiveCheck):
    """
    Provided by juggler/bundles/ps_search2/modules-available/check_skynet.pl
    """
    host = "storage"
    service = "check_skynet"
    description = "Check SkyNet services"
    local_service = "check_skynet"
    severity = Severity.MINOR


class StorageUnreachable(ActiveCheck):
    host = "storage"
    service = "UNREACHABLE"
    active = "icmpping"
    severity = Severity.MAJOR


class StorageErrataUpdates(PassiveCheck):
    """
    Provided by juggler/bundles/ps_search2/modules-available/errata_updates.py
    """
    host = "storage"
    service = "errata_updates"
    ttl = 14400
    tags = ["security-updates"]
    description = "Check if there are security updates available"
    local_service = "errata_updates"
    severity = Severity.MINOR


# Client checks
class ClientUnispace(PassiveCheck):
    """
    Provided by juggler/bundles/ps_search2/modules-available/unispace.pl
    """
    host = "client"
    service = "unispace"
    description = "Checks all available space on all local filesystems and alert when the usage exceeds the limit"
    local_service = "unispace"
    severity = Severity.MINOR


class ClientNTPStratum(PassiveCheck):
    """
    Provided by juggler/bundles/ps_search2/modules-available/ntp_stratum.sh
    """
    host = "client"
    service = "ntp_stratum"
    description = "Check ntp server synchronization"
    local_service = "ntp_stratum"
    severity = Severity.MINOR


class ClientUnreachable(ActiveCheck):
    host = "client"
    service = "UNREACHABLE"
    active = "icmpping"
    severity = Severity.MAJOR


class ClientUnreachableDisaster(ActiveCheck):
    host = "client"
    service = "UNREACHABLE_DISASTER"
    active = "icmpping"
    severity = Severity.DISASTER


class ClientMeta(PassiveCheck):
    host = "client"
    service = "META"
    local_service = "META"
    severity = Severity.MAJOR


# Aggregates

class Aggregator(object):
    """
    Docs: https://docs.yandex-team.ru/juggler/aggregates/aggregators
    """
    def __init__(self, aggregator, children, **kwargs):
        self.aggregator = aggregator
        self.aggregator_kwargs = kwargs
        self.children = [dict(host=children)] if isinstance(children, str) else children

    def to_dict(self, check):
        for child in self.children:
            child.setdefault(
                "service", check.local_service if issubclass(check, PassiveCheck) else check.service
            )
            child.setdefault("type", "CGROUP")

        return dict(
            aggregator=self.aggregator,
            aggregator_kwargs=self.aggregator_kwargs,
            children=self.children
        )

    @staticmethod
    def _remove_empty_valued_keys(dictionary):
        return {
            k: v for k, v in dictionary.items() if v is not None
        }


class DayNightTimedLimitAggregator(Aggregator):
    def __init__(self, children, day_warn=None, day_crit=None, night_warn=None, night_crit=None, **kwargs):
        super(DayNightTimedLimitAggregator, self).__init__(
            aggregator="timed_more_than_limit_is_problem",
            children=children,
            limits=[
                self._remove_empty_valued_keys(dict(
                    warn=day_warn,
                    crit=day_crit,
                    day_start=MONDAY,
                    day_end=SUNDAY,
                    time_start=int(DAY_START),
                    time_end=int(DAY_END)
                )),
                self._remove_empty_valued_keys(dict(
                    warn=night_warn,
                    crit=night_crit,
                    day_start=MONDAY,
                    day_end=SUNDAY,
                    time_start=int(DAY_END),
                    time_end=int(DAY_START)
                ))
            ],
            **kwargs
        )


# Notifications

class Notification(object):
    def __init__(self, template_name, **template_kwargs):
        self.template_name = template_name
        self.template_kwargs = template_kwargs

    def __call__(self, **template_kwargs_update):
        template_kwargs = copy.deepcopy(self.template_kwargs)
        template_kwargs.update(template_kwargs_update)
        return Notification(self.template_name, **template_kwargs)

    def to_dict(self):
        return dict(
            template_name=self.template_name,
            template_kwargs=copy.deepcopy(self.template_kwargs),
        )


class OnStatusChangeNotification(Notification):
    template_name = "on_status_change"

    def __init__(self, method, status, **kwargs):
        super(OnStatusChangeNotification, self).__init__(
            self.template_name,
            method=method,
            status=status,
            login=self._get_login_for_methods(method),
            **kwargs
        )

    @staticmethod
    def _get_login_for_methods(methods):
        if "phone" in methods:
            return DUTY_LOGINS
        if "telegram" in methods:
            return TELEGRAM_CHAT
        return SANDBOX_TEAM


on_status_change_common_critical_notification = OnStatusChangeNotification(
    status=[
        CheckStatus.CRIT,
        {"from": CheckStatus.CRIT, "to": CheckStatus.OK},
        {"from": CheckStatus.CRIT, "to": CheckStatus.WARN}
    ],
    method=["sms", "email"]
)

on_status_change_critical_only_notification_email = OnStatusChangeNotification(
    status=[CheckStatus.CRIT],
    method=["email"]
)

on_status_change_critical_only_notification_sms = OnStatusChangeNotification(
    status=[CheckStatus.CRIT],
    method=["sms"]
)

on_status_change_common_warning_notification = OnStatusChangeNotification(
    status=[CheckStatus.WARN],
    method=["email"]
)

# Docs: https://docs.yandex-team.ru/juggler/notifications/escalations#parameters
# This will only work for checks in critical status in the following manner:
# - on CRIT, wait for {delay} seconds;
# - repeat the following cycle for {repeat} times:
#   - try calling a person for {call_tries} times with 50s interval if they're not absent or on vacation;
#   - on success, wait {on_success_next_call_delay} seconds and move on to the next login;
#   - on failure, move on to the next login in 2 minutes.
# The cycle breaks if and only if the check leaves CRIT;
# Note that successful calls do not do anything: you have to set downtime or fix the reason of CRIT.
critical_call_escalation_notification = Notification(
    template_name="phone_escalation",
    logins=DUTY_LOGINS + random.sample(SANDBOX_TEAM, len(SANDBOX_TEAM)),
    delay=60,
    repeat=1,
    call_tries=2,
    on_success_next_call_delay=1800
)

critical_call_escalation_notification_daytime = critical_call_escalation_notification(
    time_start=str(DAY_START),
    time_end=str(DAY_END),
)

critical_call_escalation_notification_worktime = critical_call_escalation_notification(
    time_start=str(DAY_START),
    time_end=str(DAY_END),
    day_start=MONDAY,
    day_end=FRIDAY,
)

critical_delayed_call_escalation_notification_worktime = critical_call_escalation_notification_daytime(
    delay=3600,
)

duty_call_no_weekends = OnStatusChangeNotification(
    status=[CheckStatus.CRIT],
    method=["phone"],
    day_start=MONDAY,
    day_end=SUNDAY,
    time_start=str(DAY_START),
    time_end=str(DAY_END),
    ignore_weekends=True,
    repeat=60 * 45,
    min_interval=60 * 45
)

duty_call_notification = OnStatusChangeNotification(
    status=[CheckStatus.CRIT],
    method=["phone"]
)

startrek_notification = Notification(
    template_name="startrek",
    queue="SANDBOXALERT",
    assignee=DUTY_LOGIN,
)


# Checks bundles

def mongod_checks_generator():
    for mongod_port in range(37001, 37005):
        if mongod_port in (37003, 37004):
            children = [
                dict(host="{}@dc={}".format(SANDBOX_MONGO, dc))
                for dc in EXTRA_SHARDED_MONGO_DCS
            ]
        else:
            children = SANDBOX_MONGO

        class MongodAvailability(PassiveCheck):
            host = "mongod_{}".format(mongod_port)
            service = "down"
            tags = ["mongo", "mongod"]
            description = (
                "Check that all shards of MongoDB with number {} are up.".format(mongod_port)
            )
            local_service = "mongodbs-{}".format(mongod_port)
            severity = Severity.CRITICAL

        yield Check(
            check=MongodAvailability,
            notifications=[
                critical_call_escalation_notification,
                on_status_change_critical_only_notification_email
            ],
            aggregator=Aggregator(
                aggregator="logic_or",
                children=children,
            ),
            force_ok=[ServerUnreachable]
        )


common_checks = [
    Check(
        check=CheckSemaphoresConsistency,
        notifications=[
            on_status_change_common_critical_notification,
            on_status_change_common_warning_notification
        ]
    ),
    Check(
        check=CheckRegularAcceptance,
        notifications=[
            on_status_change_critical_only_notification_email
        ]
    ),
]

production_checks = [
    Check(
        check=ReportingServersReachability,
        notifications=[
            on_status_change_critical_only_notification_email,
            critical_call_escalation_notification
        ],
        aggregator=DayNightTimedLimitAggregator(
            day_warn=1,
            day_crit=1,
            night_warn=1,
            night_crit=2,
            children=REPORTING_SERVERS,
        )
    ),
    Check(
        check=EmergencyChannel,
        notifications=[
            OnStatusChangeNotification(
                status=[
                    {"from": CheckStatus.OK, "to": CheckStatus.WARN},
                    {"from": CheckStatus.WARN, "to": CheckStatus.CRIT},
                ],
                method=["sms"]
            ),
            critical_call_escalation_notification,
        ]
    ),
    Check(
        check=TasksAutodeploy,
        notifications=[
            critical_call_escalation_notification_daytime,
            on_status_change_critical_only_notification_email,
        ]
    ),
    Check(
        check=SandboxSLO,
        notifications=[
            critical_call_escalation_notification,
            on_status_change_critical_only_notification_email,
            on_status_change_common_warning_notification
        ],
        aggregator=Aggregator(
            aggregator="logic_and",
            children=REPORTING_SERVERS
        ),
        force_ok=[ReportingServersReachability]
    ),
    Check(
        check=CheckProxyHttpErrors,
        notifications=[
            critical_call_escalation_notification,
            on_status_change_critical_only_notification_email,
        ]
    ),
    Check(
        check=CheckProxyHttpOk,
        notifications=[
            critical_call_escalation_notification,
            on_status_change_critical_only_notification_email,
        ]
    ),
    Check(
        check=CheckProxyWorkerCount,
        notifications=[
            on_status_change_critical_only_notification_sms,
            on_status_change_critical_only_notification_email,
        ]
    ),
    Check(
        check=CheckProxyCertificateOk,
        notifications=[
            critical_call_escalation_notification,
            on_status_change_critical_only_notification_email
        ],
        aggregator=Aggregator(
            aggregator="logic_or",
            children=[dict(
                host=SANDBOX_PROXY,
                type="HOST",
            )]
        ),
    ),
    Check(
        check=CheckMetricsWakeupDuration,
        notifications=[
            critical_call_escalation_notification,
            on_status_change_common_warning_notification
        ]
    ),
    Check(
        check=CheckApiRequestsInProgress,
        notifications=[
            critical_call_escalation_notification,
            on_status_change_common_warning_notification
        ]
    ),
    Check(
        check=CheckApiRequestsRejected,
        notifications=[
            critical_call_escalation_notification
        ]
    ),
    Check(
        check=CheckMongodbOpsDuration,
        notifications=[
            on_status_change_critical_only_notification_sms,
            on_status_change_critical_only_notification_email,
        ]
    ),
    Check(
        check=CheckMongodbActiveWriters,
        notifications=[
            on_status_change_critical_only_notification_sms,
            on_status_change_critical_only_notification_email,
        ]
    ),
    Check(
        check=FastboneSandbox,
        notifications=[
            duty_call_notification,
            on_status_change_critical_only_notification_email,
        ],
        aggregator=DayNightTimedLimitAggregator(
            day_warn=1,
            day_crit=70,
            night_warn="80%",
            night_crit="100%",
            children=SANDBOX_LINUX_CLIENTS,
            nodata_mode="skip"
        ),
    ),
    Check(
        check=AssignedTasksAge,
        notifications=[
            critical_call_escalation_notification_worktime,
            startrek_notification,
        ],
        aggregator=Aggregator(
            aggregator="logic_and",
            children=REPORTING_SERVERS
        ),
        force_ok=[ReportingServersReachability]
    ),
    Check(
        check=EnqueueingTasksAge,
        notifications=[
            critical_call_escalation_notification,
            on_status_change_critical_only_notification_email
        ],
        aggregator=Aggregator(
            aggregator="logic_and",
            children=REPORTING_SERVERS
        ),
        force_ok=[ReportingServersReachability]
    ),
    Check(
        check=ExecutingTasksYabsYtstat,
        notifications=[
            critical_call_escalation_notification,
            on_status_change_critical_only_notification_email
        ],
        aggregator=Aggregator(
            aggregator="logic_and",
            children=REPORTING_SERVERS
        ),
        force_ok=[ReportingServersReachability]
    ),
    Check(
        check=MongoCfgAvailability,
        notifications=[
            critical_call_escalation_notification,
            on_status_change_critical_only_notification_email
        ],
        aggregator=DayNightTimedLimitAggregator(
            day_crit=1,
            night_crit="100%",
            children=SANDBOX_MONGO_CONFIG_SERVERS
        ),
        force_ok=[ReportingServersReachability],
    ),
    Check(
        check=ServiceProcessesDelay,
        notifications=[
            critical_call_escalation_notification,
            on_status_change_critical_only_notification_email
        ],
        aggregator=Aggregator(
            aggregator="more_than_limit_is_crit",
            children=REPORTING_SERVERS,
            limit=2
        ),
        force_ok=[ReportingServersReachability],
        check_options={
            "args": ["PRODUCTION"]
        },
    ),
    Check(
        check=ServiceQActivity,
        notifications=[
            critical_call_escalation_notification,
            on_status_change_critical_only_notification_email
        ],
        aggregator=Aggregator(
            aggregator="logic_and",
            children=REPORTING_SERVERS
        ),
        force_ok=[ReportingServersReachability]
    ),
    Check(
        check=ServiceQCPU,
        notifications=[
            critical_call_escalation_notification,
            on_status_change_critical_only_notification_sms,
            on_status_change_critical_only_notification_email
        ],
        aggregator=Aggregator(
            aggregator="logic_and",
            children=REPORTING_SERVERS
        ),
        force_ok=[ReportingServersReachability]
    ),
    Check(
        check=MdsCleanerDelay,
        notifications=[
            critical_call_escalation_notification,
            on_status_change_critical_only_notification_sms,
            on_status_change_critical_only_notification_email
        ],
        aggregator=Aggregator(
            aggregator="logic_and",
            children=REPORTING_SERVERS
        ),
        force_ok=[ReportingServersReachability]
    ),
    Check(
        check=QueueAutocheckPrecommits,
        notifications=[
            critical_call_escalation_notification,
            on_status_change_critical_only_notification_email
        ],
        aggregator=Aggregator(
            aggregator="logic_and",
            children=REPORTING_SERVERS
        ),
        force_ok=[ReportingServersReachability]
    ),
    Check(
        check=QueueAutocheckPostcommits,
        notifications=[
            critical_call_escalation_notification,
            on_status_change_critical_only_notification_email
        ],
        aggregator=Aggregator(
            aggregator="logic_and",
            children=REPORTING_SERVERS
        ),
        force_ok=[ReportingServersReachability]
    ),
    Check(
        check=RejectedTasks,
        notifications=[
            on_status_change_critical_only_notification_email
        ],
        aggregator=Aggregator(
            aggregator="logic_and",
            children=REPORTING_SERVERS
        ),
        force_ok=[ReportingServersReachability]
    ),
    Check(
        check=Coredumps,
        notifications=[
            critical_call_escalation_notification,
            on_status_change_critical_only_notification_email
        ],
        aggregator=Aggregator(
            aggregator="logic_and",
            children=SANDBOX_MONGO_CONFIG_SERVERS
        ),
        force_ok=[ReportingServersReachability]
    ),
    Check(
        check=StepUnprocessedEvents,
        notifications=[
            critical_call_escalation_notification,
            on_status_change_critical_only_notification_sms,
            on_status_change_critical_only_notification_email
        ],
        aggregator=Aggregator(
            aggregator="logic_and",
            children=REPORTING_SERVERS
        ),
        force_ok=[ReportingServersReachability]
    ),
    Check(
        check=TaskSessionConsistency,
        notifications=[
            critical_delayed_call_escalation_notification_worktime,
            on_status_change_critical_only_notification_email,
            startrek_notification,
        ]
    ),
    Check(
        check=TasksTransientStatuses,
        notifications=[
            critical_call_escalation_notification_daytime,
            on_status_change_common_warning_notification,
            startrek_notification,
        ]
    ),
    Check(
        check=XMLRPCExceptionCount,
        notifications=[
            on_status_change_critical_only_notification_email
        ],
        aggregator=Aggregator(
            aggregator="logic_and",
            children=REPORTING_SERVERS
        ),
        force_ok=[ReportingServersReachability]
    ),
    Check(
        check=ZookeeperNodes,
        notifications=[
            critical_call_escalation_notification,
            on_status_change_critical_only_notification_sms,
            on_status_change_critical_only_notification_email
        ],
        aggregator=Aggregator(
            aggregator="logic_and",
            children=REPORTING_SERVERS
        ),
        force_ok=[ReportingServersReachability]
    ),
    # Server checks
    Check(
        check=ServerCPULoad,
        notifications=[critical_call_escalation_notification],
        aggregator=Aggregator(
            aggregator="more_than_limit_is_crit",
            children=SANDBOX_SERVERS,
            limit=6
        )
    ),
    Check(
        check=ServerHWErrs,
        notifications=[critical_call_escalation_notification],
        aggregator=DayNightTimedLimitAggregator(
            day_warn=1,
            day_crit=1,
            night_warn=1,
            night_crit=3,
            children=SANDBOX_SERVERS,
        )
    ),
    Check(
        check=ServerHTTP,
        notifications=[critical_call_escalation_notification],
        aggregator=DayNightTimedLimitAggregator(
            day_warn=2,
            day_crit=4,
            night_warn=2,
            night_crit=4,
            children=SANDBOX_API,
        )
    ),
    Check(
        check=ServerRAID,
        notifications=[critical_call_escalation_notification],
        aggregator=DayNightTimedLimitAggregator(
            day_warn=1,
            day_crit=1,
            night_warn=1,
            night_crit=3,
            children=SANDBOX_SERVERS,
        )
    ),
    Check(
        check=ServerNTPStratum,
        notifications=[critical_call_escalation_notification],
        aggregator=DayNightTimedLimitAggregator(
            day_warn=1,
            day_crit=1,
            night_warn=1,
            night_crit=20,
            children=SANDBOX_SERVERS,
        )
    ),
    Check(
        check=ServerFileserver,
        notifications=[critical_call_escalation_notification],
        aggregator=DayNightTimedLimitAggregator(
            day_warn=1,
            day_crit=1,
            night_warn=1,
            night_crit=5,
            children=SANDBOX_SERVERS,
        )
    ),
    Check(
        check=ServerZookeperMntr,
        notifications=[critical_call_escalation_notification],
        aggregator=DayNightTimedLimitAggregator(
            day_warn=1,
            day_crit=1,
            night_warn=1,
            night_crit=2,
            children=SANDBOX_ZOOKEEPER,
        )
    ),
    Check(
        check=ServerZookeeper,
        notifications=[critical_call_escalation_notification],
        aggregator=DayNightTimedLimitAggregator(
            day_warn=1,
            day_crit=1,
            night_warn=1,
            night_crit=2,
            children=SANDBOX_ZOOKEEPER,
        )
    ),
    Check(
        check=ServerSSH,
        notifications=[critical_call_escalation_notification],
        aggregator=DayNightTimedLimitAggregator(
            day_warn=1,
            day_crit=1,
            night_warn=1,
            night_crit=3,
            children=SANDBOX_SERVERS,
        )
    ),
    Check(
        check=ServerUnreachable,
        notifications=[critical_call_escalation_notification],
        aggregator=DayNightTimedLimitAggregator(
            day_warn=1,
            day_crit=1,
            night_warn=1,
            night_crit=2,
            children=SANDBOX_SERVERS,
        )
    ),
    Check(
        check=ServerUnreachableDisaster,
        aggregator=Aggregator(
            aggregator="more_than_limit_is_problem",
            children=SANDBOX_SERVERS,
            mode="percent",
            warn_limit=33,
            crit_limit=33,
        )
    ),
    Check(
        check=ServerCheckSkynet,
        notifications=[critical_call_escalation_notification],
        aggregator=DayNightTimedLimitAggregator(
            day_warn=1,
            day_crit=1,
            night_warn=1,
            night_crit=4,
            children=SANDBOX_SERVERS,
        )
    ),
    Check(
        check=ServerUnispace,
        notifications=[critical_call_escalation_notification],
        aggregator=DayNightTimedLimitAggregator(
            day_warn=1,
            day_crit=1,
            night_warn=1,
            night_crit=3,
            children=SANDBOX_SERVERS,
        )
    ),
    Check(
        check=ServerErrataUpdates,
        notifications=[critical_call_escalation_notification],
        aggregator=Aggregator(
            aggregator="more_than_limit_is_problem",
            children=SANDBOX_SERVERS,
            mode="percent",
            warn_limit=1,
            crit_limit=100,
            short_stats=False,
        ),
        force_ok=[ServerUnreachable]
    ),
    # Server Step checks
    Check(
        check=ServerStepClockEvent,
        notifications=[critical_call_escalation_notification],
        aggregator=Aggregator(
            aggregator="more_than_limit_is_problem",
            warn_limit=1,
            crit_limit=2,
            children=SANDBOX_MONGO,
        )
    ),
    # Storage checks
    Check(
        check=StorageRAID,
        notifications=[critical_call_escalation_notification],
        aggregator=Aggregator(
            aggregator="logic_or",
            children=SANDBOX_STORAGES
        ),
        force_ok=[StorageUnreachable, StorageMeta]
    ),
    Check(
        check=StorageFileserver,
        notifications=[critical_call_escalation_notification],
        aggregator=DayNightTimedLimitAggregator(
            day_warn=1,
            day_crit=2,
            night_warn="10%",
            night_crit="50%",
            children=SANDBOX_STORAGES,
        )
    ),
    Check(
        check=StorageHWErrors,
        notifications=[critical_call_escalation_notification],
        aggregator=DayNightTimedLimitAggregator(
            day_warn=1,
            day_crit=3,
            night_warn="10%",
            night_crit="100%",
            children=SANDBOX_STORAGES,
        )
    ),
    Check(
        check=StorageMeta,
        notifications=[critical_call_escalation_notification],
        aggregator=DayNightTimedLimitAggregator(
            day_warn=1,
            day_crit=1,
            night_warn="10%",
            night_crit="50%",
            children=SANDBOX_STORAGES,
        )
    ),
    Check(
        check=StorageCheckSkynet,
        notifications=[critical_call_escalation_notification],
        aggregator=DayNightTimedLimitAggregator(
            day_warn=1,
            day_crit=2,
            night_warn="10%",
            night_crit="100%",
            children=SANDBOX_STORAGES,
        )
    ),
    Check(
        check=StorageUnreachable,
        notifications=[critical_call_escalation_notification],
        aggregator=DayNightTimedLimitAggregator(
            day_warn=1,
            day_crit=1,
            night_warn=1,
            night_crit=4,
            children=SANDBOX_STORAGES,
        )
    ),
    Check(
        check=StorageErrataUpdates,
        notifications=[critical_call_escalation_notification],
        aggregator=Aggregator(
            aggregator="more_than_limit_is_problem",
            children=SANDBOX_STORAGES,
            mode="percent",
            warn_limit=1,
            crit_limit=100,
            short_stats=False,
        ),
        force_ok=[StorageUnreachable]
    ),
    # Client checks
    Check(
        check=ClientUnispace,
        notifications=[critical_call_escalation_notification_daytime],
        aggregator=DayNightTimedLimitAggregator(
            day_warn=1,
            day_crit="33%",
            night_warn="80%",
            night_crit="100%",
            children=SANDBOX_CLIENTS,
        )
    ),
    Check(
        check=ClientNTPStratum,
        notifications=[critical_call_escalation_notification_daytime],
        aggregator=DayNightTimedLimitAggregator(
            day_warn=5,
            day_crit="50%",
            night_warn="80%",
            night_crit="100%",
            children=SANDBOX_LINUX_CLIENTS,
        )
    ),
    Check(
        check=ClientUnreachable,
        notifications=[critical_call_escalation_notification_daytime],
        aggregator=DayNightTimedLimitAggregator(
            day_warn=10,
            day_crit=70,
            night_warn="50%",
            night_crit="80%",
            children=SANDBOX_CLIENTS,
        )
    ),
    Check(
        check=ClientUnreachableDisaster,
        aggregator=Aggregator(
            aggregator="more_than_limit_is_problem",
            children=SANDBOX_CLIENTS,
            mode="percent",
            warn_limit=80,
            crit_limit=80,
        )
    ),
    Check(
        check=ClientMeta,
        notifications=[critical_call_escalation_notification_daytime],
        aggregator=DayNightTimedLimitAggregator(
            day_warn=1,
            day_crit=50,
            night_warn="80%",
            night_crit="100%",
            children=SANDBOX_LINUX_CLIENTS,
        )
    )
] + list(mongod_checks_generator()) + list(dead_clients_checks_generator())

pre_production_checks = [
    Check(
        check=ReportingServersReachability,
        aggregator=Aggregator(
            aggregator="more_than_limit_is_crit",
            limit=1,
            children=REPORTING_SERVERS_PREPROD,
        )
    ),
    Check(
        check=ClientUnispace,
        aggregator=Aggregator(
            aggregator="more_than_limit_is_problem",
            children=SANDBOX_PREPROD_CLIENTS,
            mode="percent",
            warn_limit=1,
            crit_limit=33
        )
    ),
    Check(
        check=ClientUnreachable,
        aggregator=Aggregator(
            aggregator="more_than_limit_is_problem",
            children=SANDBOX_PREPROD_CLIENTS,
            mode="percent",
            warn_limit=1,
            crit_limit=33
        )
    ),
    Check(
        check=ServiceProcessesDelay,
        aggregator=Aggregator(
            aggregator="more_than_limit_is_crit",
            children=REPORTING_SERVERS_PREPROD,
            limit=2
        ),
        force_ok=[ReportingServersReachability],
        check_options={
            "args": ["PRE_PRODUCTION"]
        },
    ),
] + list(dead_clients_checks_generator_preprod())

registry = {
    PRODUCTION_KEY: Namespace(name=PRODUCTION_NAMESPACE, checks=common_checks + production_checks),
    PRE_PRODUCTION_KEY: Namespace(name=PRE_PRODUCTION_NAMESPACE, checks=pre_production_checks)
}


def upsert_checks(key, token):
    # noinspection PyUnresolvedReferences
    from libs.sandbox import rest
    namespace = registry.get(key % 2)
    if namespace:
        juggler = rest.Client(base_url=API_URL, auth=token)
        for check in namespace.get_checks():
            try:
                # noinspection PyArgumentList
                juggler.checks["add_or_update?do=1"](**check)
            except Exception:
                logging.error("Failed to upload juggler check: %s", json.dumps(check, indent=4))
                raise
