import argparse
import functools
import json
import juggler_sdk
import logging
import sys

from infra.yp_service_discovery.monitoring.solomon.src.util.solomon_api import SolomonApi


CTYPE_AUTH = 'auth-yp-dns'
CTYPE_NS_CACHE = 'ns-cache'
CTYPE_DNS_CACHE = 'dns-cache'

CTYPES = [
    CTYPE_AUTH,
    CTYPE_NS_CACHE,
    CTYPE_DNS_CACHE,
]

SOLOMON_PROJECT_ID = 'yp_dns'

JUGGLER_NAMESPACE = 'yp.dns'

WARN = 'warn'
CRIT = 'crit'

YP_CLUSTERS = {
    'sas-test': {
        'alert_params': {
        },
    },
    'sas': {
        'alert_params': {
            'objects_number_usage_threshold': 70,
        },
    },
    'man-pre': {
        'alert_params': {
        },
    },
    'man': {
        'alert_params': {
            'objects_number_usage_threshold': 95,
        },
    },
    'vla': {
        'alert_params': {
            'objects_number_usage_threshold': 90,
        },
    },
    'iva': {
        'alert_params': {
            'objects_number_usage_threshold': 85,
        },
    },
    'myt': {
        'alert_params': {
            'objects_number_usage_threshold': 85,
        },
    },
    'xdc': {
        'alert_params': {
            'objects_number_usage_threshold': 96,
        },
    },
}

DATACENTERS = [
    'sas',
    'man',
    'vla',
    'msk',
]

SOLOMON_LAG_CHECKS = [
    'solomon:yp.dns.solomon_lag',
]

HOSTS_PER_DC = 6

ZONES = {
    'sas-test.yp-c.yandex.net': {
        'AAAA': {
            'empty_perc_threshold': 95,
        },
    },
    'yp-test.yandex.net': {
        'AAAA': {
            'empty_perc_threshold': 95,
        },
    },
    'man-pre.yp-c.yandex.net': {
        'AAAA': {
            'empty_perc_threshold': 95,
        },
    },
    'sas.yp-c.yandex.net': {
        'AAAA': {
            'empty_perc_threshold': 95,
        },
    },
    'man.yp-c.yandex.net': {
        'AAAA': {
            'empty_perc_threshold': 95,
        },
    },
    'vla.yp-c.yandex.net': {
        'AAAA': {
            'empty_perc_threshold': 95,
        },
    },
    'myt.yp-c.yandex.net': {
        'AAAA': {
            'empty_perc_threshold': 95,
        },
    },
    'iva.yp-c.yandex.net': {
        'AAAA': {
            'empty_perc_threshold': 95,
        },
    },
    'xdc.yp-c.yandex.net': {
        'AAAA': {
            'empty_perc_threshold': 95,
        },
    },
    'gencfg-c.yandex.net': {
        'AAAA': {
            'empty_perc_threshold': 95,
        },
    },
    'in.yandex.net': {
        'AAAA': {
            'empty_perc_threshold': 95,
        },
    },
    'in.yandex-team.ru': {
        'AAAA': {
            'empty_perc_threshold': 95,
        },
    },
    'stable.qloud-b.yandex.net': {
        'AAAA': {
            'empty_perc_threshold': 95,
        },
    },
    'prestable.qloud-b.yandex.net': {
        'AAAA': {
            'empty_perc_threshold': 95,
        },
    },
    'test.qloud-b.yandex.net': {
        'AAAA': {
            'empty_perc_threshold': 95,
        },
    },
    'qloud-c.yandex.net': {
        'AAAA': {
            'empty_perc_threshold': 95,
        },
    },
    'stable.qloud-d.yandex.net': {
        'AAAA': {
            'empty_perc_threshold': 95,
        },
        'SRV': {
            'empty_perc_threshold': 95,
        },
    },
    'prestable.qloud-d.yandex.net': {
        'AAAA': {
            'empty_perc_threshold': 95,
        },
        'SRV': {
            'empty_perc_threshold': 95,
        },
    },
    'test.qloud-d.yandex.net': {
        'AAAA': {
            'empty_perc_threshold': 95,
        },
        'SRV': {
            'empty_perc_threshold': 95,
        },
    },
}


def warden_enabled(ctype_config, filter_args):
    if not ctype_config.get('warden', {}).get('enable', False):
        return False

    for k, v in filter_args.items():
        if ctype_config.get('warden', {}).get('disable_filters', {}).get(k, {}).get(v, False):
            return False

    return True


def milliseconds(seconds=0, minutes=0):
    return seconds * 1000 + minutes * 60 * 1000


def get_juggler_host(ctype_config, dc):
    return '{}.{}'.format(dc, ctype_config['juggler_params']['host_base'])


def get_nanny_service_by_juggler_host(ctype_config, host):
    nanny_service_cfg = ctype_config['juggler_params']['urls']['nanny_service']
    if 'dc_to_service' in nanny_service_cfg:
        dc = host.split('.', 1)[0]
        if dc == 'all':
            return set(nanny_service_cfg['dc_to_service'].values())
        else:
            return [nanny_service_cfg['dc_to_service'][dc]]


def get_solomon_lag_checks():
    return SOLOMON_LAG_CHECKS


def get_yp_downtime_tags(yp_cluster):
    return [
        'yp-downtime-9b8ee2a2b559b3afff751623e5e79546',
        'yp-{}-downtime-9b8ee2a2b559b3afff751623e5e79546'.format(yp_cluster),
    ]


def get_yp_unreach_checks(yp_cluster):
    return ['yp-{}.yp.yandex.net:yp.infra.auto_downtime_'.format(yp_cluster)]


def get_yp_standard_unreach_checks(yp_cluster):
    return get_yp_unreach_checks(yp_cluster) + get_solomon_lag_checks()


def get_solomon_alert_url(alert_id):
    return {
        'url': 'https://solomon.yandex-team.ru/admin/projects/{}/alerts/{}'.format(SOLOMON_PROJECT_ID, alert_id),
        'title': 'Alert on solomon',
    }


def get_nanny_service_url(service, is_adm=False):
    return {
        'url': 'https://{}nanny.yandex-team.ru/ui/#/services/catalog/{}'.format('adm-' if is_adm else '', service),
        'title': 'Nanny: {}'.format(service),
        'type': 'nanny',
    }


def get_docs_url():
    return {
        'url': 'https://wiki.yandex-team.ru/ypdns/monitoring/#opisaniealertov',
        'title': 'Описание алертов',
        'type': 'wiki',
    }


def get_source_url():
    return {
        'url': 'https://a.yandex-team.ru/arc/trunk/arcadia/infra/yp_dns/monitoring/alerts',
        'title': 'Alerts generation source code',
    }


def get_standard_urls(alert_id):
    return [
        get_solomon_alert_url(alert_id),
        get_docs_url(),
        get_source_url(),
    ]


def notification(status, logins, notify_methods):
    template_kwargs = {
        "status": status,
        "login": logins,
        "method": notify_methods,
    }

    if (isinstance(status, str) and status == "CRIT") or (isinstance(status, dict) and status["to"] == "CRIT"):
        template_kwargs["repeat"] = 1800  # 30 min

    return juggler_sdk.NotificationOptions(
        template_name="on_status_change",
        template_kwargs=template_kwargs,
    )


def get_notifications_config(notify_config, level=CRIT, include_escalation=True):
    logins = notify_config['logins'][level]
    escalation_logins = notify_config['escalations'][level]

    result = []
    if logins:
        result.extend([
            notification(
                status={'from': 'OK', 'to': 'CRIT'},
                logins=logins,
                notify_methods=[
                    'sms',
                    'telegram',
                ]
            ),
            notification(
                status={'from': 'WARN', 'to': 'CRIT'},
                logins=logins,
                notify_methods=[
                    'sms',
                    'telegram',
                ]
            ),
            notification(
                status={'from': 'CRIT', 'to': 'OK'},
                logins=logins,
                notify_methods=[
                    'telegram',
                ]
            ),
        ])

    if include_escalation and escalation_logins:
        result.extend([
            juggler_sdk.NotificationOptions(
                template_name='phone_escalation',
                template_kwargs={
                    'delay': 60,
                    'logins': escalation_logins,
                }
            ),
        ])

    return result


def get_juggler_check(ctype_config, solomon_alert, host, service, notification_level=CRIT, notification_with_escalation=True,
                      unreach_checks=SOLOMON_LAG_CHECKS, notify_params=None, flaps_config=None, tags=(), meta={}):
    if notify_params is None:
        notify_params = ctype_config['notifications']['default']

    check = juggler_sdk.Check(
        host=host,
        service=service,
        namespace=JUGGLER_NAMESPACE,
        aggregator='logic_or',
        aggregator_kwargs={
            'unreach_checks': unreach_checks,
        },
        notifications=get_notifications_config(notify_params, notification_level, notification_with_escalation),
        flaps_config=flaps_config,
        tags=tags,
        meta=meta,
    )

    urls = get_standard_urls(solomon_alert['id'])
    if 'nanny_service' in ctype_config['juggler_params']['urls']:
        nanny_services = get_nanny_service_by_juggler_host(ctype_config, check.host)
        assert nanny_services is not None
        for nanny_service in nanny_services:
            urls.append(get_nanny_service_url(nanny_service, ctype_config['juggler_params']['urls']['nanny_service'].get('is_adm', False)))
    check.meta['urls'] = urls

    return check


def get_juggler_check_with_yp_downtime(ctype_config, solomon_alert, yp_cluster=None, **kwargs):
    return get_juggler_check(ctype_config, solomon_alert, unreach_checks=get_yp_standard_unreach_checks(yp_cluster), **kwargs)


def get_juggler_checks(func, ctype_config, solomon_alert, **kwargs):
    result = []
    original_meta = kwargs.get('meta')
    for dc in ctype_config['datacenters'] if 'host' in solomon_alert.get('groupByLabels', []) else ['all']:
        if 'meta' in kwargs:
            new_meta = original_meta.copy()
            for k, v in new_meta.items():
                if isinstance(v, str):
                    new_meta[k] = v.format(dc=dc.upper())
            kwargs['meta'] = new_meta

        result.append(func(ctype_config=ctype_config, solomon_alert=solomon_alert, host=get_juggler_host(ctype_config, dc), **kwargs))
    return result


def get_sensor(ctype_config, sensor, replace_nan=None, **kwargs):
    labels = kwargs.copy()
    labels['sensor'] = '.'.join([ctype_config['solomon']['global_prefix'], sensor])
    labels.setdefault('cluster', ctype_config['solomon']['cluster'])
    labels.setdefault('service', ctype_config['solomon']['service'])
    labels.setdefault('host', '|'.join(ctype_config['datacenters']))

    result = '{' + ', '.join(map(lambda label: "{}='{}'".format(label[0], label[1]), labels.items())) + '}'
    if replace_nan is not None:
        result = 'replace_nan({}, {})'.format(result, replace_nan)
    return result


def make_solomon_alert_id(ctype_config, name):
    return '{}{}'.format(name.replace('.', '-'), ctype_config['solomon_alert_params'].get('id_suffix', ''))


def get_replica_sensor(ctype_config, sensor, **kwargs):
    return get_sensor(ctype_config, '.'.join([ctype_config['yp_replica']['sensor_prefix'], sensor]), **kwargs)


def get_pdns_sensor(ctype_config, sensor, **kwargs):
    return get_sensor(ctype_config, '.'.join(['pdns', sensor]), **kwargs)


def get_backend_sensor(ctype_config, sensor, **kwargs):
    return get_sensor(ctype_config, '.'.join(['pdns.backend.YP_DNS', sensor]), **kwargs)


def get_description(descr, ctype_config):
    return '{}{}'.format(ctype_config['solomon_alert_params'].get('description_prefix', ''), descr)


def group_lines(fn, *sensors):
    result = ', '.join(map(lambda sensor: 'to_vector({})'.format(sensor), sensors))
    result = 'group_lines("{}", flatten({}))'.format(fn, result)
    return result


def patch_config(config, ctype_config):
    config.setdefault('projectId', SOLOMON_PROJECT_ID)
    config.setdefault('notificationChannels', ['juggler'])

    host = '{{labels.host}}' if 'host' in config.get('groupByLabels', []) else 'all'
    config['annotations'] = {
        'host': host,
        'service': config['id'],
        'ctype': ctype_config['juggler_params']['host_base'],
    } | config.get('annotations', {})
    config['id'] = make_solomon_alert_id(ctype_config, config['id'])
    config['name'] = get_description(config['name'], ctype_config)
    return config


def get_storage_age_alert(ctype_config, yp_cluster):
    service_id = 'yp_replica.{}.age'.format(yp_cluster)
    sensor = get_replica_sensor(ctype_config, 'storage.age_indicator', yp_cluster=yp_cluster)

    solomon_alert = patch_config({
        'id': service_id,
        'name': 'Indicator of storage age of YP-{} replica'.format(yp_cluster.upper()),
        'groupByLabels': [
            'host',
        ],
        'type': {
            'threshold': {
                'selectors': sensor,
                'timeAggregation': 'MIN',
                'predicate': 'GTE',
                'threshold': 1,
            }
        },
        'periodMillis': milliseconds(seconds=30),
        'delaySeconds': 0,
    }, ctype_config)

    tags = []
    meta = {}

    if warden_enabled(ctype_config, {'yp_cluster': yp_cluster}):
        tags += [
            'warden_auto_source',
            'warden_alert_create_spi',
            'warden_alert_start_flow',
            'warden_alert_account_metric',
            'warden_alert_category_boolean',
            'warden_functionality_{component}_{service}_yp-to-dns-sync-lag'.format(
                component=ctype_config['warden']['component']['name'],
                service=ctype_config['warden']['service']['name'],
            ),
        ]
        meta |= {
            'title': 'YP {} to {} {{dc}} updates lag'.format(yp_cluster.upper(), ctype_config['warden']['service']['title_name']),
        }

    if yp_cluster == 'xdc':
        flaps_config = juggler_sdk.FlapOptions(stable=1200, critical=0, boost=0)
    else:
        flaps_config = juggler_sdk.FlapOptions(stable=600, critical=0, boost=0)
    juggler_checks = get_juggler_checks(
        get_juggler_check_with_yp_downtime,
        ctype_config,
        solomon_alert,
        service=service_id,
        yp_cluster=yp_cluster,
        notification_level=ctype_config.get('yp_cluster_notification_levels', {}).get(yp_cluster, CRIT),
        flaps_config=flaps_config,
        tags=tags,
        meta=meta,
    )
    return solomon_alert, juggler_checks


def get_max_objects_reached_alert(ctype_config, yp_cluster):
    service_id = 'yp_replica.{}.max_objects_reached'.format(yp_cluster)
    sensor = get_replica_sensor(ctype_config, 'max_selected_objects_number_reached', yp_cluster=yp_cluster)

    solomon_alert = patch_config({
        'id': service_id,
        'name': 'Max objects number reached for YP-{} replica'.format(yp_cluster.upper()),
        'type': {
            'threshold': {
                'selectors': sensor,
                'timeAggregation': 'SUM',
                'predicate': 'GTE',
                'threshold': 1,
            }
        },
        'periodMillis': milliseconds(seconds=60),
        'delaySeconds': 0,
    }, ctype_config)

    tags = []
    meta = {}

    if warden_enabled(ctype_config, {'yp_cluster': yp_cluster}):
        tags += [
            'warden_auto_source',
            'warden_alert_create_spi',
            'warden_alert_start_flow',
            'warden_alert_account_metric',
            'warden_alert_category_boolean',
            'warden_functionality_{component}_{service}_yp-to-dns-sync-lag'.format(
                component=ctype_config['warden']['component']['name'],
                service=ctype_config['warden']['service']['name'],
            ),
        ]
        meta |= {
            'title': 'YP {} to {} {{dc}} record sets number reached limit'.format(yp_cluster.upper(), ctype_config['warden']['service']['title_name']),
        }

    juggler_checks = get_juggler_checks(
        get_juggler_check,
        ctype_config,
        solomon_alert,
        service=service_id,
        notification_level=ctype_config.get('yp_cluster_notification_levels', {}).get(yp_cluster, CRIT),
        meta=meta,
        tags=tags,
    )
    return solomon_alert, juggler_checks


def get_objects_number_limit_usage_alert(ctype_config, yp_cluster):
    service_id = 'yp_replica.{}.objects_number_limit_usage'.format(yp_cluster)

    hosts = ctype_config['solomon_alert_params']['all_hosts_label_value']
    objects_number_series = get_replica_sensor(ctype_config, 'storage_objects_number', host=hosts, yp_cluster=yp_cluster, column_family='dns_record_set#MAIN#')
    limit_objects_number_series = get_replica_sensor(ctype_config, 'max_objects_number', host=hosts, yp_cluster=yp_cluster)

    perc_threshold = YP_CLUSTERS[yp_cluster]['alert_params'].get('objects_number_usage_threshold', 60)

    solomon_alert = patch_config({
        'id': service_id,
        'name': 'Record sets number in YP-{} more than {}% of limit'.format(yp_cluster.upper(), perc_threshold),
        'type': {
            'expression': {
                'program': '\n'.join([
                    'let objects_number = max(series_max({}));'.format(objects_number_series),
                    'let limit_objects_number = min(series_min({}));'.format(limit_objects_number_series),
                    'let usage = objects_number / limit_objects_number;',
                    'let usage_perc = usage * 100;',
                    'let objects_number_pretty = to_fixed(objects_number, 0);',
                    'let limit_objects_number_pretty = to_fixed(limit_objects_number, 0);',
                    'let usage_perc_pretty = to_fixed(usage_perc, 2);',
                ]),
                'checkExpression': 'usage > {}'.format(perc_threshold / 100.0),
            },
        },
        'periodMillis': milliseconds(seconds=120),
        'delaySeconds': 30,
        'annotations': {
            'usage': '{{expression.usage_perc_pretty}}%',
            'value': '{{expression.objects_number_pretty}}',
            'limit': '{{expression.limit_objects_number_pretty}}',
        },
    }, ctype_config)

    juggler_checks = get_juggler_checks(
        get_juggler_check,
        ctype_config,
        solomon_alert,
        service=service_id,
        notification_level=ctype_config.get('yp_cluster_notification_levels', {}).get(yp_cluster, CRIT),
    )
    return solomon_alert, juggler_checks


def get_max_allowed_storage_space_reached_alert(ctype_config, yp_cluster):
    service_id = 'yp_replica.{}.max_allowed_storage_space_reached'.format(yp_cluster)
    sensor = get_replica_sensor(ctype_config, 'storage.rocksdb.is_max_allowed_space_reached', yp_cluster=yp_cluster)

    solomon_alert = patch_config({
        'id': service_id,
        'name': 'Max allowed storage space reached for YP-{} replica'.format(yp_cluster.upper()),
        'type': {
            'threshold': {
                'selectors': sensor,
                'timeAggregation': 'SUM',
                'predicate': 'GTE',
                'threshold': 1,
            }
        },
        'periodMillis': milliseconds(seconds=60),
        'delaySeconds': 0,
    }, ctype_config)

    juggler_checks = get_juggler_checks(
        get_juggler_check,
        ctype_config,
        solomon_alert,
        service=service_id,
        notification_level=ctype_config.get('yp_cluster_notification_levels', {}).get(yp_cluster, CRIT),
    )
    return solomon_alert, juggler_checks


def get_compactions_exceeds_space_limit_alert(ctype_config, yp_cluster):
    service_id = 'yp_replica.{}.compactions_exceeds_space_limit'.format(yp_cluster)
    sensor = get_replica_sensor(ctype_config, 'storage.rocksdb.is_max_allowed_space_reached_including_compactions', yp_cluster=yp_cluster)

    solomon_alert = patch_config({
        'id': service_id,
        'name': 'Max allowed storage space reached including compactions for YP-{} replica'.format(yp_cluster.upper()),
        'type': {
            'threshold': {
                'selectors': sensor,
                'timeAggregation': 'SUM',
                'predicate': 'GTE',
                'threshold': 1,
            }
        },
        'periodMillis': milliseconds(seconds=60),
        'delaySeconds': 0,
    }, ctype_config)

    juggler_checks = get_juggler_checks(
        get_juggler_check,
        ctype_config,
        solomon_alert,
        service=service_id,
        notification_with_escalation=False,
        notification_level=ctype_config.get('yp_cluster_notification_levels', {}).get(yp_cluster, CRIT),
        flaps_config=juggler_sdk.FlapOptions(stable=180, critical=540, boost=0),
    )
    return solomon_alert, juggler_checks


def get_sst_files_size_limit_usage_alert(ctype_config, yp_cluster):
    service_id = 'yp_replica.{}.sst_files_size_limit_usage'.format(yp_cluster)

    hosts = ctype_config['solomon_alert_params']['all_hosts_label_value']

    sst_files_size_series = get_replica_sensor(ctype_config, 'storage.rocksdb.sst_files_size_bytes', host=hosts, yp_cluster=yp_cluster)
    max_allowed_space_usage_series = get_replica_sensor(ctype_config, 'storage.rocksdb.max_allowed_space_usage', host=hosts, yp_cluster=yp_cluster)

    perc_threshold = YP_CLUSTERS[yp_cluster]['alert_params'].get('sst_files_size_usage_threshold', 60)

    solomon_alert = patch_config({
        'id': service_id,
        'name' : 'Total size of sst files in YP-{} more than {}% of limit'.format(yp_cluster.upper(), perc_threshold),
        'type' : {
            'expression': {
                'program': '\n'.join([
                    'let sst_files_size = max(series_max({}));'.format(sst_files_size_series),
                    'let max_allowed_space_usage = min(series_min({}));'.format(max_allowed_space_usage_series),
                    'let usage = max_allowed_space_usage != 0 ? sst_files_size / max_allowed_space_usage : 0;',
                    'let usage_perc = usage * 100;',
                    'let sst_files_size_pretty = to_fixed(sst_files_size, 0);',
                    'let max_allowed_space_usage_pretty = to_fixed(max_allowed_space_usage, 0);',
                    'let usage_perc_pretty = to_fixed(usage_perc, 2);',
                ]),
                'checkExpression': 'usage > {}'.format(perc_threshold / 100.0),
            },
        },
        'periodMillis': milliseconds(seconds=120),
        'delaySeconds': 30,
        'annotations': {
            'usage': '{{expression.usage_perc_pretty}}%',
            'value_bytes': '{{expression.sst_files_size_pretty}}',
            'limit_bytes': '{{expression.max_allowed_space_usage_pretty}}',
        },
    }, ctype_config)

    juggler_checks = get_juggler_checks(
        get_juggler_check,
        ctype_config,
        solomon_alert,
        service=service_id,
        notification_level=ctype_config.get('yp_cluster_notification_levels', {}).get(yp_cluster, CRIT),
        flaps_config=juggler_sdk.FlapOptions(stable=180, critical=540, boost=0),
    )
    return solomon_alert, juggler_checks


def get_latency_500ms_alert(ctype_config):
    threshold = 500
    service_id = 'latency_{}ms'.format(threshold)
    sensor = get_pdns_sensor(ctype_config, 'latency')

    solomon_alert = patch_config({
        'id': service_id,
        'name': 'Average latency > {}ms'.format(threshold),
        'groupByLabels': [
            'host',
        ],
        'type': {
            'threshold': {
                'selectors': sensor,
                'timeAggregation': 'AT_LEAST_ONE',
                'predicate': 'GT',
                'threshold': threshold * 1000,  # 500 ms
            }
        },
        'periodMillis': milliseconds(seconds=60),
        'delaySeconds': 0,
    }, ctype_config)

    juggler_checks = get_juggler_checks(
        get_juggler_check,
        ctype_config,
        solomon_alert,
        service=service_id,
    )
    return solomon_alert, juggler_checks


def get_udp_queries_number_alert(ctype_config):
    service_id = 'udp-queries'
    sensor = get_pdns_sensor(ctype_config, 'udp-queries')
    threshold = 25000 * HOSTS_PER_DC

    solomon_alert = patch_config({
        'id': service_id,
        'name': 'UDP queries number > {}K'.format(int(threshold / 1000)),
        'groupByLabels': [
            'host',
        ],
        'type': {
            'threshold': {
                'selectors': sensor,
                'timeAggregation': 'AT_LEAST_ONE',
                'predicate': 'GT',
                'threshold': threshold,
            }
        },
        'periodMillis': milliseconds(seconds=60),
        'delaySeconds': 0,
    }, ctype_config)

    juggler_checks = get_juggler_checks(
        get_juggler_check,
        ctype_config,
        solomon_alert,
        service=service_id,
    )
    return solomon_alert, juggler_checks


def get_tcp_queries_number_alert(ctype_config):
    service_id = 'tcp-queries'
    sensor = get_pdns_sensor(ctype_config, 'tcp-queries')
    threshold = 500 * HOSTS_PER_DC

    solomon_alert = patch_config({
        'id': service_id,
        'name': 'TCP queries number > {}K'.format(int(threshold / 1000)),
        'groupByLabels': [
            'host',
        ],
        'type': {
            'threshold': {
                'selectors': sensor,
                'timeAggregation': 'AT_LEAST_ONE',
                'predicate': 'GT',
                'threshold': threshold,
            }
        },
        'periodMillis': milliseconds(seconds=60),
        'delaySeconds': 0,
    }, ctype_config)

    juggler_checks = get_juggler_checks(
        get_juggler_check,
        ctype_config,
        solomon_alert,
        service=service_id,
    )
    return solomon_alert, juggler_checks


def get_timedout_packets_alert(ctype_config):
    service_id = 'timedout-packets'
    sensor = get_pdns_sensor(ctype_config, 'timedout-packets')
    threshold = 500

    solomon_alert = patch_config({
        'id': service_id,
        'name': 'Timed out packets number > {}'.format(threshold),
        'groupByLabels': [
            'host',
        ],
        'type': {
            'threshold': {
                'selectors': sensor,
                'timeAggregation': 'SUM',
                'predicate': 'GT',
                'threshold': threshold,
            }
        },
        'periodMillis': milliseconds(seconds=60),
        'delaySeconds': 0,
    }, ctype_config)

    juggler_checks = get_juggler_checks(
        get_juggler_check,
        ctype_config,
        solomon_alert,
        service=service_id,
    )
    return solomon_alert, juggler_checks


def get_servfail_packets_alert(ctype_config):
    service_id = 'servfail-packets'
    sensor = get_pdns_sensor(ctype_config, 'servfail-packets')
    threshold = 5000

    solomon_alert = patch_config({
        'id': service_id,
        'name': 'SERVFAIL packets number > {}'.format(threshold),
        'groupByLabels': [
            'host',
        ],
        'type': {
            'threshold': {
                'selectors': sensor,
                'timeAggregation': 'SUM',
                'predicate': 'GT',
                'threshold': threshold,
            }
        },
        'periodMillis': milliseconds(seconds=60),
        'delaySeconds': 0,
    }, ctype_config)

    juggler_checks = get_juggler_checks(
        get_juggler_check,
        ctype_config,
        solomon_alert,
        service=service_id,
    )
    return solomon_alert, juggler_checks


def get_corrupt_packets_alert(ctype_config):
    service_id = 'corrupt-packets'
    sensor = get_pdns_sensor(ctype_config, 'corrupt-packets')
    threshold = 5000

    solomon_alert = patch_config({
        'id': service_id,
        'name': 'Corrupted packets number > {}'.format(threshold),
        'groupByLabels': [
            'host',
        ],
        'type': {
            'threshold': {
                'selectors': sensor,
                'timeAggregation': 'SUM',
                'predicate': 'GT',
                'threshold': threshold,
            }
        },
        'periodMillis': milliseconds(seconds=60),
        'delaySeconds': 0,
    }, ctype_config)

    juggler_checks = get_juggler_checks(
        get_juggler_check,
        ctype_config,
        solomon_alert,
        service=service_id,
    )
    return solomon_alert, juggler_checks


def get_queue_size_alert(ctype_config):
    service_id = 'queue-size'
    sensor = get_pdns_sensor(ctype_config, 'qsize-q')
    threshold = 200 * HOSTS_PER_DC

    solomon_alert = patch_config({
        'id': service_id,
        'name': 'Packets waiting queue size > {}'.format(threshold),
        'groupByLabels': [
            'host',
        ],
        'type': {
            'threshold': {
                'selectors': sensor,
                'timeAggregation': 'AVG',
                'predicate': 'GT',
                'threshold': threshold,
            }
        },
        'periodMillis': milliseconds(seconds=60),
        'delaySeconds': 0,
    }, ctype_config)

    juggler_checks = get_juggler_checks(
        get_juggler_check,
        ctype_config,
        solomon_alert,
        service=service_id,
    )
    return solomon_alert, juggler_checks


def get_overload_drops_alert(ctype_config):
    service_id = 'overload-drops'
    sensor = get_pdns_sensor(ctype_config, 'overload-drops')
    threshold = 100 * HOSTS_PER_DC

    solomon_alert = patch_config({
        'id': service_id,
        'name': 'Number of questions dropped because backends overloaded > {}'.format(threshold),
        'groupByLabels': [
            'host',
        ],
        'type': {
            'threshold': {
                'selectors': sensor,
                'timeAggregation': 'SUM',
                'predicate': 'GT',
                'threshold': threshold,
            }
        },
        'periodMillis': milliseconds(seconds=60),
        'delaySeconds': 0,
    }, ctype_config)

    juggler_checks = get_juggler_checks(
        get_juggler_check,
        ctype_config,
        solomon_alert,
        service=service_id,
    )
    return solomon_alert, juggler_checks


def get_udp_noport_errors_alert(ctype_config):
    service_id = 'udp-noport-errors'
    sensor = get_pdns_sensor(ctype_config, 'udp-noport-errors')
    threshold = 500

    solomon_alert = patch_config({
        'id': service_id,
        'name': 'Number of UDP packets where an ICMP response was received that the remote port was not listening > {}'.format(threshold),
        'groupByLabels': [
            'host',
        ],
        'type': {
            'threshold': {
                'selectors': sensor,
                'timeAggregation': 'SUM',
                'predicate': 'GT',
                'threshold': threshold,
            }
        },
        'periodMillis': milliseconds(seconds=60),
        'delaySeconds': 0,
    }, ctype_config)

    juggler_checks = get_juggler_checks(
        get_juggler_check,
        ctype_config,
        solomon_alert,
        service=service_id,
    )
    return solomon_alert, juggler_checks


def get_udp_in_errors_alert(ctype_config):
    service_id = 'udp-in-errors'
    sensor = get_pdns_sensor(ctype_config, 'udp-in-errors')
    threshold = 500

    solomon_alert = patch_config({
        'id': service_id,
        'name': 'Number of packets, received faster than the OS could process them > {}'.format(threshold),
        'groupByLabels': [
            'host',
        ],
        'type': {
            'threshold': {
                'selectors': sensor,
                'timeAggregation': 'SUM',
                'predicate': 'GT',
                'threshold': threshold,
            }
        },
        'periodMillis': milliseconds(seconds=60),
        'delaySeconds': 0,
    }, ctype_config)

    juggler_checks = get_juggler_checks(
        get_juggler_check,
        ctype_config,
        solomon_alert,
        service=service_id,
    )
    return solomon_alert, juggler_checks


def get_udp_recvbuf_errors_alert(ctype_config):
    service_id = 'udp-recvbuf-errors'
    sensor = get_pdns_sensor(ctype_config, 'udp-recvbuf-errors')
    threshold = 500

    solomon_alert = patch_config({
        'id': service_id,
        'name': 'Number of errors caused in the UDP receive buffer > {}'.format(threshold),
        'groupByLabels': [
            'host',
        ],
        'type': {
            'threshold': {
                'selectors': sensor,
                'timeAggregation': 'SUM',
                'predicate': 'GT',
                'threshold': threshold,
            }
        },
        'periodMillis': milliseconds(seconds=60),
        'delaySeconds': 0,
    }, ctype_config)

    juggler_checks = get_juggler_checks(
        get_juggler_check,
        ctype_config,
        solomon_alert,
        service=service_id,
    )
    return solomon_alert, juggler_checks


def get_udp_sndbuf_errors_alert(ctype_config):
    service_id = 'udp-sndbuf-errors'
    sensor = get_pdns_sensor(ctype_config, 'udp-sndbuf-errors')
    threshold = 500

    solomon_alert = patch_config({
        'id': service_id,
        'name': 'Number of errors caused in the UDP send buffer > {}'.format(threshold),
        'groupByLabels': [
            'host',
        ],
        'type': {
            'threshold': {
                'selectors': sensor,
                'timeAggregation': 'SUM',
                'predicate': 'GT',
                'threshold': threshold,
            }
        },
        'periodMillis': milliseconds(seconds=60),
        'delaySeconds': 0,
    }, ctype_config)

    juggler_checks = get_juggler_checks(
        get_juggler_check,
        ctype_config,
        solomon_alert,
        service=service_id,
    )
    return solomon_alert, juggler_checks


def get_empty_responses_alert(ctype_config, zone, record_type):
    service_id = 'empty-responses-{}-{}'.format(zone, record_type)
    perc_threshold = ZONES[zone].get(record_type, {}).get('empty_perc_threshold', 95)

    requests_sensor = group_lines('max', get_backend_sensor(ctype_config, 'requests', replace_nan=1, zone=zone, packet_record_type=record_type, record_type='ANY'), 'constant_line(1)')
    empty_responses_sensor = get_backend_sensor(ctype_config, 'empty_responses', replace_nan=0, zone=zone, packet_record_type=record_type, record_type='ANY')

    solomon_alert = patch_config({
        'id': service_id,
        'name': 'Empty responses percentage for {}:{} > {}%'.format(zone, record_type, perc_threshold),
        'groupByLabels': [
            'host',
        ],
        'type': {
            'expression': {
                'program': '\n'.join([
                    'let requests = {};'.format(requests_sensor),
                    'let empty = {};'.format(empty_responses_sensor),
                    'let empty_perc = empty / requests;',
                ]),
                'checkExpression': 'last(empty_perc) > {} && last(requests) > 10'.format(perc_threshold / 100.0),
            },
        },
        'periodMillis': milliseconds(seconds=60),
        'delaySeconds': 30,
    }, ctype_config)

    juggler_checks = get_juggler_checks(
        get_juggler_check,
        ctype_config,
        solomon_alert,
        service=service_id,
        notification_level=WARN,
        flaps_config=juggler_sdk.FlapOptions(stable=60, critical=180, boost=0),
    )
    return solomon_alert, juggler_checks


def get_start_failed_alert(ctype_config):
    service_id = 'yp-dns-start-failed'
    sensor = get_sensor(ctype_config, 'yp_dns_service.start.failed')

    solomon_alert = patch_config({
        'id': service_id,
        'name': 'YP DNS module start failed',
        'type': {
            'threshold': {
                'selectors': sensor,
                'timeAggregation': 'SUM',
                'predicate': 'GT',
                'threshold': 0,
            }
        },
        'periodMillis': milliseconds(seconds=60),
        'delaySeconds': 0,
    }, ctype_config)

    juggler_checks = get_juggler_checks(
        get_juggler_check,
        ctype_config,
        solomon_alert,
        service=service_id,
    )
    return solomon_alert, juggler_checks


def get_solomon_lag_check():
    return juggler_sdk.Check(
        host='solomon',
        service='yp.dns.solomon_lag',
        namespace=JUGGLER_NAMESPACE,
        aggregator='logic_or',
        children=[
            juggler_sdk.Child(
                host='solomon_alerting_project_lag',
                service=SOLOMON_PROJECT_ID,
                group_type='HOST',
                instance=''
            ),
        ],
    )


PDNS_ALERTS = [
    get_latency_500ms_alert,
    get_udp_queries_number_alert,
    get_tcp_queries_number_alert,
    get_timedout_packets_alert,
    get_servfail_packets_alert,
    get_corrupt_packets_alert,
    get_queue_size_alert,
    get_overload_drops_alert,
    get_udp_noport_errors_alert,
    get_udp_in_errors_alert,
    get_udp_recvbuf_errors_alert,
    get_udp_sndbuf_errors_alert,
]

BACKEND_ALERTS = [
] + [
    functools.partial(get_empty_responses_alert, zone=zone, record_type=record_type)
    for zone, cfg in ZONES.items() for record_type in cfg.keys()
]

ALERTS_FOR_YP_CLUSTER = [
    get_storage_age_alert,
    get_max_objects_reached_alert,
    get_max_allowed_storage_space_reached_alert,
    get_compactions_exceeds_space_limit_alert,
    get_objects_number_limit_usage_alert,
    get_sst_files_size_limit_usage_alert,
]

UNBOUND_ALERTS = [
    get_start_failed_alert,
]

AUTH_YP_DNS_DATACENTERS = [
    'sas',
    'man',
    'vla',
    'msk',
]

DNS_CACHE_DATACENTERS = [
    'Sas',
    'Man',
    'Vla',
]

CTYPE_CONFIGS = {
    CTYPE_AUTH: {
        'solomon': {
            'cluster': 'prod',
            'service': 'main',
            'global_prefix': 'yp_dns',
        },
        'yp_replica': {
            'sensor_prefix': 'yp_replica',
        },
        'solomon_alert_params': {
            'description_prefix': '[ns{1-2}.yp-dns] ',
            'all_hosts_label_value': 'sas?*|man?*|vla?*|myt?*',
        },
        'juggler_params': {
            'mark': 'yp-dns-alerts',
            'host_base': 'yp-dns',
            'urls': {
                'nanny_service': {
                    'dc_to_service': {dc: '{}_yp_dns'.format(dc) for dc in AUTH_YP_DNS_DATACENTERS},
                },
            },
        },
        'alerts': {
            'default': PDNS_ALERTS + BACKEND_ALERTS,
            'by_yp_cluster': ALERTS_FOR_YP_CLUSTER,
        },
        'yp_cluster_notification_levels': {
            'sas-test': WARN,
            'man-pre': WARN,
        },
        'notifications': {
            'default': {
                'logins': {
                    WARN: [
                        '@svc_yp_dns:yp-dns-duty',
                        'dns-monitoring',
                    ],
                    CRIT: [
                        '@svc_yp_dns:yp-dns-duty',
                        'dns-monitoring',
                    ],
                },
                'escalations': {
                    WARN: [
                    ],
                    CRIT: [
                        '@svc_yp_dns:yp-dns-duty',
                        'elshiko',
                        'avitella',
                    ],
                },
            },
        },
        'datacenters': AUTH_YP_DNS_DATACENTERS,
        'warden': {
            'enable': True,
            'disable_filters': {
                'yp_cluster': {
                    'sas-test': True,
                    'man-pre': True,
                },
            },
            'component': {
                'name': 'dns',
            },
            'service': {
                'name': 'yp-dns',
                'title_name': 'YP DNS',
            },
        },
    },
    CTYPE_NS_CACHE: {
        'solomon': {
            'cluster': 'ns-cache',
            'service': 'main',
            'global_prefix': 'unbound',
        },
        'yp_replica': {
            'sensor_prefix': 'yp_replica',
        },
        'solomon_alert_params': {
            'id_suffix': '-ns-cache',
            'description_prefix': '[ns-cache] ',
            'all_hosts_label_value': 'ns*',
        },
        'juggler_params': {
            'mark': 'ns-cache-dns-alerts',
            'host_base': 'ns-cache',
            'urls': {
            },
        },
        'alerts': {
            'default': UNBOUND_ALERTS,
            'by_yp_cluster': ALERTS_FOR_YP_CLUSTER,
        },
        'notifications': {
            'default': {
                'logins': {
                    WARN: [
                        '@svc_yp_dns:yp-dns-duty',
                    ],
                    CRIT: [
                        '@svc_yp_dns:yp-dns-duty',
                        'dns-monitoring',
                    ],
                },
                'escalations': {
                    WARN: [
                    ],
                    CRIT: [
                        '@svc_yp_dns:yp-dns-duty',
                        'slayer',
                        'elshiko',
                        'avitella',
                    ],
                },
            },
        },
        'datacenters': [
            'Sas',
            'Man',
            'Vla',
            'Iva',
            'Myt',
        ],
    },
    CTYPE_DNS_CACHE: {
        'solomon': {
            'cluster': 'dns-cache',
            'service': 'main',
            'global_prefix': 'unbound',
        },
        'yp_replica': {
            'sensor_prefix': 'yp_replica',
        },
        'solomon_alert_params': {
            'id_suffix': '-dns-cache',
            'description_prefix': '[dns-cache] ',
            'all_hosts_label_value': 'sas?*|man?*|vla?*',
        },
        'juggler_params': {
            'mark': 'dns-cache-dns-alerts',
            'host_base': 'dns-cache',
            'urls': {
                'nanny_service': {
                    'dc_to_service': {dc: 'production_dns_cache' for dc in DNS_CACHE_DATACENTERS},
                    'is_adm': True,
                },
            },
        },
        'alerts': {
            'default': UNBOUND_ALERTS,
            'by_yp_cluster': ALERTS_FOR_YP_CLUSTER,
        },
        'notifications': {
            'default': {
                'logins': {
                    WARN: [
                        '@svc_yp_dns:yp-dns-duty',
                    ],
                    CRIT: [
                        '@svc_yp_dns:yp-dns-duty',
                        'dns-monitoring',
                    ],
                },
                'escalations': {
                    WARN: [
                    ],
                    CRIT: [
                        '@svc_yp_dns:yp-dns-duty',
                        'slayer',
                        'elshiko',
                        'avitella',
                    ],
                },
            },
        },
        'datacenters': DNS_CACHE_DATACENTERS,
    },
}


def update_juggler_check(juggler_client, check, force=False):
    if isinstance(check, juggler_sdk.CheckFilter):
        result = juggler_client.remove_checks([check]).results[0]
        logging.info('Removed juggler check {}:{}: {}'.format(check.host, check.service, 'Success' if result.success else 'Failed'))
    else:
        diff = juggler_client.upsert_check(check, force=force).diff.to_dict()
        logging.info('Updated juggler check {}:{}'.format(check.service, check.service))
        logging.info('Diff:\n{}'.format(json.dumps(diff, indent=2)))


def update_alert(solomon_client, juggler_client, solomon_config, juggler_checks):
    alert_id = solomon_config['id']
    solomon_client.put_item('alerts', alert_id, solomon_config)

    if juggler_checks is None:
        return

    if not isinstance(juggler_checks, list):
        juggler_checks = [juggler_checks]

    for juggler_check in juggler_checks:
        update_juggler_check(juggler_client, juggler_check)


def update_alerts_for_ctype(ctype_config, solomon_client, juggler_client):
    for yp_cluster in YP_CLUSTERS.keys():
        for get_alert_func in ctype_config['alerts'].get('by_yp_cluster', []):
            update_alert(solomon_client, juggler_client, *get_alert_func(ctype_config, yp_cluster))

    for get_alert_func in ctype_config['alerts'].get('default', []):
        update_alert(solomon_client, juggler_client, *get_alert_func(ctype_config))


def update_alerts(args):
    solomon_client = SolomonApi(SOLOMON_PROJECT_ID, args.solomon_token)

    juggler_client = juggler_sdk.JugglerApi('http://juggler-api.search.yandex.net', oauth_token=args.juggler_token)
    update_juggler_check(juggler_client, get_solomon_lag_check(), force=True)

    for ctype in args.ctype:
        ctype_config = CTYPE_CONFIGS[ctype]
        with juggler_sdk.JugglerApi('http://juggler-api.search.yandex.net', oauth_token=args.juggler_token, mark=ctype_config['juggler_params'].get('mark')) as juggler_client:
            update_alerts_for_ctype(ctype_config, solomon_client, juggler_client)


def parse_args(argv):
    parser = argparse.ArgumentParser()
    parser.add_argument('--ctype', nargs='+', required=False, choices=CTYPES, default=CTYPES)
    parser.add_argument('--solomon-token', required=True, help='solomon token. https://solomon.yandex-team.ru/api/internal/auth')
    parser.add_argument('--juggler-token', required=True, help='juggler token. https://juggler.yandex-team.ru/')
    parser.add_argument('--verbose', action="store_true", help='Enable verbose mode')
    return parser.parse_args(argv)


def main(argv):
    args = parse_args(argv)
    logging.basicConfig(level=(logging.DEBUG if args.verbose else logging.INFO))

    update_alerts(args)


if __name__ == '__main__':
    main(sys.argv[1:])
