#!/usr/bin/env python2

import argparse
import json
import copy


def make_alert_impl(**kwargs):
    alert = {}
    alert["name"] = kwargs["name"]
    alert["tags"] = kwargs["tags"]
    alert["signal"] = kwargs["signal"]
    alert["crit"] = kwargs["crit"]
    alert["warn"] = kwargs["warn"]
    alert["mgroups"] = ["ASEARCH"]
    alert["decription"] = alert["name"]

    if kwargs.get("value_modify"):
        alert["value_modify"] = kwargs["value_modify"]

    notifications = []
    for method, logins, custom_args in kwargs.get("notifications", []):
        args = {
            "status": "CRIT",
            "method": [method],
            "login": logins
        }
        args.update(custom_args)

        notifications.append({
            "template_name": "on_status_change",
            "template_kwargs": args
        })

    if notifications:
        juggler_check = {}
        juggler_check["namespace"] = "l7"
        juggler_check["host"] = alert["name"]
        juggler_check["service"] = "L7.yandex.ru"
        if "flaps" in kwargs and kwargs["flaps"]:
            juggler_check["flaps"] = kwargs["flaps"]

        juggler_check["notifications"] = notifications

        juggler_check["aggregator"] = "logic_or"
        juggler_check["aggregator_kwargs"] = {
            "unreach_service": [{"check": "yasm_alert:virtual-meta"}],
            "unreach_mode": "force_ok",
            "nodata_mode": "force_ok",
        }

        alert["juggler_check"] = juggler_check

    return alert


def make_alert(name, prj, geo, signal, crit, warn, notifications, flaps, value_modify):
    return make_alert_impl(
        name=name,
        tags={"itype": "balancer", "ctype": "prod,prestable,experiment", "prj": prj, "geo": geo},
        signal=signal,
        crit=crit,
        warn=warn,
        notifications=notifications,
        flaps=flaps,
        value_modify=value_modify
    )


TERM_PRJ = [
    "l7-balancer-term",
    "l7-balancer-yaru",
    "l7-balancer-exp-term",
    "l7-geo-only",
    "any",
    "l7-proxy",
]
SERVICE_PRJ = [
    "l7-balancer-knoss-search",
    "l7-balancer-knoss-morda",
    "l7-balancer-knoss-fast",
]

PRJ = TERM_PRJ + SERVICE_PRJ

GEO = ["sas", "man", "vla"]


class Env:
    def __init__(self, prj, geo, section=None):
        self.prj = prj
        self.geo = geo
        self.notifications = []
        self.section = section


def self_alert(name, env, signal, warn, crit, notifications, flaps, value_modify):
    return make_alert(
        name="L7_alerts.self.{}.{}.{}".format(env.prj, env.geo, name),
        prj=env.prj,
        geo=env.geo,
        signal=signal,
        crit=crit,
        warn=warn,
        notifications=notifications,
        flaps=flaps,
        value_modify=value_modify
    )


L7_ALERTS_TELEGRAMM = ("telegram", ["l7-alerts", ], {"repeat": 5 * 60})
L7_ALERTS_EXP_TELEGRAMM = ("telegram", ["l7-alerts-exp", ], {"repeat": 5 * 60})


self_alerts_template = {
    "backend_fails": {
        "signal": "perc(sum(balancer_report-report-service_total-backend_fail_summ, balancer_report-report-service_total-other_fail_summ), balancer_report-report-service_total-requests_summ)",
        "warn": [0.5, 10],
        "crit": [10, None],
    },
    "client_fails": {
        "signal": "perc(balancer_report-report-service_total-client_fail_summ, balancer_report-report-service_total-requests_summ)",
        "warn": [5, 10],
        "crit": [10, None],
    },
    "mem_guarantee": {
        "signal": "perc(portoinst-anon_usage_gb_tmmv, portoinst-memory_guarantee_gb_tmmv)",
        "warn": [30, 50],
        "crit": [50, None],
    },
    "mem_guarantee_quant": {
        "signal": "quant(portoinst-memory_anon_unevict_limit_usage_perc_hgram, 99)",
        "warn": [30, 50],
        "crit": [50, None],
    },
    "cpu_usage": {
        "signal": "quant(portoinst-cpu_guarantee_usage_perc_hgram, 50)",
        "warn": [10, 20],
        "crit": [20, None],
    },
    "cpu_usage_quant": {
        "signal": "quant(portoinst-cpu_guarantee_usage_perc_hgram, 99)",
        "warn": [30, 40],
        "crit": [40, None],
    },
    "cpu_throttled": {
        "signal": "portoinst-cpu_throttled_cores_tmmv",
        "warn": [0.01, 0.01],
        "crit": [0.01, None],
    },
    "worker_cpu_q95": {
        "signal": "quant(balancer_report-worker-cpu_usage_hgram, 95)",
        "warn": [15, 25],
        "crit": [25, None],
    },
    "host_cpu": {
        "signal": "diff(100, havg(cpu-id_hgram))",
        "warn": [20, 30],
        "crit": [30, None],
    },
    "worker_desync_perc": {
        "signal": "perc(balancer_report-worker-cpu_usage_time_desync_summ, balancer_report-worker-conts_cycle_summ)",
        "warn": [0.005, 0.1],
        "crit": [0.1, None],
        "flaps": {"stable": 30, "critical": 120},
    },
    "maxconn": {
        "signal": "perc(balancer_report-worker-tcp_conns_ammv, balancer_report-worker-tcp_max_conns_ammv)",
        "warn": [10, 30],
        "crit": [30, None],
    },
    "crashes_total": {
        "signal": "messages-crashes_total_summ",
        "warn": [0.5, 0.5],
        "crit": [0.5, None],
        "flaps": {}
    },
    "crashes_not_9": {
        "signal": "max(diff(messages-crashes_total_summ, messages-crashes_sig9_summ), sum(messages-crashes_sig6_summ, messages-crashes_sig11_summ))",
        "warn": [0.5, 0.5],
        "crit": [0.5, None],
        "flaps": {},
        "value_modify": {
            "type": "max",
            "window": 180,
        }
    },
    "host_conntrack": {
        "signal": "perc(netstat-nf_conntrack_count_max, netstat-nf_conntrack_max)",
        "warn": [10, 30],
        "crit": [30, None],
    },
    "host_fd": {
        "signal": "perc(quant(fdstat-fd_used_hgram, 99), quant(fdstat-fd_available_hgram, 1))",
        "warn": [15, 25],
        "crit": [25, None],
    },
    "retransmits": {
        "signal": "div(portoinst-net_retrans_segs_tmmv, mul(500, instances-counter_tmmm))",
        "warn": [25, 50],
        "crit": [50, None],
        "flaps": {"stable": 300, "critical": 1500},
        "value_modify": {
            "type": "aver",
            "window": 300,
        }
    },
    "retransmits_perc": {
        "signal": "perc(portoinst-net_retrans_segs_tmmv, portoinst-net_iface_veth_tx_packets_tmmv)",
        "warn": [5, 10],
        "crit": [10, None],
        "flaps": {"stable": 300, "critical": 1500},
        "value_modify": {
            "type": "aver",
            "window": 300,
        }
    },
    "network_out": {
        "signal": "quant(portoinst-net_tx_utilization_hgram, max)",
        "warn": [50, 75],
        "crit": [75, None],
    },
    "network_in": {
        "signal": "quant(portoinst-net_rx_utilization_hgram, max)",
        "warn": [50, 75],
        "crit": [75, None],
    },
    "antirobot_non_200": {
        "signal": "perc(diff(balancer_report-report-antirobot-requests_summ, balancer_report-report-antirobot-outgoing_2xx_summ), balancer_report-report-antirobot-requests_summ)",
        "warn": [10, 50],
        "crit": [50, None],
    },
    "cpu_limiter": {
        "signal": "sum(balancer_report-worker-cpu_limiter_conn_rejected_summ, balancer_report-worker-cpu_limiter_http2_disabled_summ, balancer_report-worker-cpu_limiter_http2_closed_summ, balancer_report-worker-cpu_limiter_keepalive_closed_summ)",
        "warn": [0.5, 0.5],
        "crit": [0.5, None],
        "flaps": {},
    },
    "checker_cache_miss": {
        "signal": "balancer_report-worker-cpu_limiter_checker_cache_miss_summ",
        "warn": [100, 100],
        "crit": [100, None],
        "flaps": {"stable": 1800, "critical": 5400},
    },
    "sd_fatal_errors": {
        "signal": "sum(balancer_report-sd-update_fail_summ,balancer_report-sd-update_loop_errors_summ, balancer_report-sd-cache_store_errors_summ, balancer_report-sd-cache_load_errors_summ, balancer_report-sd-update_errors_summ, balancer_report-sd-endpointset_update_errors_summ, balancer_report-sd-requester_errors_summ, balancer_report-sd-log_write_errors_summ)",
        "warn": [0.5, 0.5],
        "crit": [0.5, None],
        "flaps": {},
    },
    "sd_remote_errors": {
        "signal": "perc(balancer_report-sd-remote_errors_summ, balancer_report-sd-requests_summ)",
        "warn": [0.1, 10],
        "crit": [10, None],
        "flaps": {"stable": 60, "critical": 240},
    },
    "sd_update_loop_overload": {
        "signal": "balancer_report-sd-update_loop_overload_summ",
        "warn": [1, 5],
        "crit": [5, None],
    },
    "exit_errors": {
        "signal": "sum(instancectl-balancer.exit_error_mmmm, instancectl-balancer.exit_signal_mmmm)",
        "warn": [0.5, 0.5],
        "crit": [0.5, None],
        "flaps": {},
    },
    "fd_usage": {
        "signal": "perc(balancer_report-fd_size_ammv, balancer_report-no_file_limit_ammv)",
        "warn": [50, 75],
        "crit": [75, None],
    },
    "watchdog": {
        "signal": "balancer_report-threads-froze_ammv",
        "warn": [0.5, 0.5],
        "crit": [1, None],
        "flaps": {},
    },
    "dns_errors": {
        "signal": "sum(balancer_report-worker-dns_async_resolve_timeout_error_summ, balancer_report-worker-dns_async_empty_address_error_summ, balancer_report-worker-dns_async_resolve_general_error_summ, balancer_report-pinger-dns_async_resolve_timeout_error_summ, balancer_report-pinger-dns_async_empty_address_error_summ, balancer_report-pinger-dns_async_resolve_general_error_summ)",
        "warn": [1, 75],
        "crit": [75, None],
    },
    "antirobot_client_fails": {
        "signal": "perc(balancer_report-report-antirobot-client_fail_summ,balancer_report-report-antirobot-requests_summ)",
        "warn": [2, 10],
        "crit": [10, None],
    },
    "antirobot_non_client_fails": {
        "signal": "perc(diff(balancer_report-report-antirobot-fail_summ,balancer_report-report-antirobot-client_fail_summ),balancer_report-report-antirobot-requests_summ)",
        "warn": [1, 2],
        "crit": [2, None],
    },
    "laas_fails": {
        "signal": "perc(balancer_report-report-l7heavygeobasemodule-fail_summ, balancer_report-report-l7heavygeobasemodule-requests_summ)",
        "warn": [0.5, 1],
        "crit": [1, None],
    },
    "uaas_fails": {
        "signal": "perc(balancer_report-report-expgetter-fail_summ, balancer_report-report-expgetter-requests_summ)",
        "warn": [0.5, 1],
        "crit": [1, None],
    },
    "disk_logs_usage": {
        "signal": "portoinst-volume_/logs_usage_perc_txxx",
        "warn": [60, 75],
        "crit": [75, None],
    },
    "disk_cwd_usage": {
        "signal": "portoinst-volume_cwd_usage_perc_txxx",
        "warn": [60, 75],
        "crit": [75, None],
    },
    "disk_root_usage": {
        "signal": "portoinst-volume_root_usage_perc_txxx",
        "warn": [60, 75],
        "crit": [75, None],
    },
    "unistat_overflow": {
        "signal": "modules-unistat-filtered_signals_mmmm",
        "warn": [0.5, 0.5],
        "crit": [0.5, None],
        "flaps": {},
    },
    "cycles_stopped": {
        "signal": "balancer_report-http-cycles_stopped_summ",
        "warn": [0.5, 0.5],
        "crit": [0.5, None],
    },
    "cycle_header_length": {
        "signal": "balancer_report-http-cycle-header-length_summ",
        "warn": [0.5, 0.5],
        "crit": [0.5, None],
    },
    "cycle-header-format-error": {
        "signal": "balancer_report-http-cycle-header-format-error_summ",
        "warn": [0.5, 0.5],
        "crit": [0.5, None],
    },
    "banned_addresses": {
        "signal": "balancer_report-ban-addresses_ammv",
        "warn": [0.5, 0.5],
        "crit": [0.5, None],
    },
}

COMMON_HOST_ALERTS = [
    "host_fd",
]

NO_ANTIROBOT = SERVICE_PRJ + ["l7-proxy"]

L7_HOSTS = ['l7-balancer-term', 'l7-balancer-exp-term']
DYN_HOSTS = ['l7-balancer-knoss-search', 'l7-balancer-knoss-morda', 'l7-balancer-knoss-fast']
ALL_HOSTS = L7_HOSTS + DYN_HOSTS

LOCATIONS = {
    'l7-balancer-exp-term': ['vla'],
    'l7-proxy': ['sas', 'man', 'vla'],
}


def custom_alert(env, name, alert):
    antirobot_alerts = ["antirobot_non_200", "antirobot_client_fails", "antirobot_non_client_fails", "banned_addresses"]
    if name in antirobot_alerts and env.prj in NO_ANTIROBOT:
        return None

    if name == "laas_fails" and env.prj not in (SERVICE_PRJ + ["l7-balancer-yaru"]):
        return None

    if name == "uaas_fails" and env.prj not in (SERVICE_PRJ + ["l7-balancer-yaru"]):
        return None

    if name == "sd_remote_errors" and env.prj in ("l7-balancer-yastat", "l7-proxy"):
        return None

    if name == "crashes_total" and env.prj in DYN_HOSTS:
        return None

    if name.startswith("host_") and name not in COMMON_HOST_ALERTS and env.prj not in L7_HOSTS:
        return None

    if name in COMMON_HOST_ALERTS and env.prj not in ALL_HOSTS:
        return None

    if name.startswith("crashes_") and env.prj not in ALL_HOSTS:
        return None

    if env.geo not in LOCATIONS.get(env.prj, GEO):
        return None

    res = copy.deepcopy(alert)

    if name == "banned_addresses":
        if env.prj == "l7-balancer-yaru":
            res["warn"] = [0.5, 35000]
            res["crit"] = [35000, None]

        if env.prj in L7_HOSTS:
            res["warn"] = [0.5, 101000]
            res["crit"] = [101000, None]

        if env.prj == "any":
            res["warn"] = [0.5, 4000]
            res["crit"] = [4000, None]

    if env.prj == "l7-balancer-exp-term":
        res["notifications"] = [L7_ALERTS_EXP_TELEGRAMM]

    if "flaps" not in res:
        if env.prj == "l7-balancer-exp-term":
            res["flaps"] = {"stable": 60, "critical": 5 * 60}
        else:
            res["flaps"] = {"stable": 25, "critical": 2 * 60}

    if name == "antirobot_non_200":
        if env.prj in ("l7-balancer-yaru", "any"):
            return None

        if env.geo == "man":
            res["warn"] = [40, 85]
            res["crit"] = [85, None]

    if name == "host_network_out" or name == "host_network_in":
        net_bandwith = {
            "man": 10000,
            "sas": 10000,
            "vla": 10000
        }

        if env.prj in DYN_HOSTS:
            res["warn"] = [30, 50]
            res["crit"] = [50, None]

        res["signal"] = res["signal"].format(net_bandwith[env.geo] * 5 * 1024 * 1024)

    if name == "host_retransmits":
        if env.prj in L7_HOSTS:
            res["warn"] = [400, 800]
            res["crit"] = [800, None]

    if name == "host_retransmits_perc":
        if env.prj in L7_HOSTS:
            res["warn"] = [1.0, 2.0]
            res["crit"] = [2.0, None]

    if name == "cpu_usage":
        if env.prj in DYN_HOSTS:
            res["warn"] = [20, 35]
            res["crit"] = [35, None]

        if env.prj == "l7-balancer-yaru" and env.geo == "man":
            res["warn"] = [20, 35]
            res["crit"] = [35, None]

    if name == "worker_cpu_q95":
        if env.prj == "l7-balancer-yaru" and env.geo == "man":
            res["warn"] = [25, 55]
            res["crit"] = [55, None]

        if env.prj in DYN_HOSTS:
            res["warn"] = [30, 35]
            res["crit"] = [35, None]

    if name == "client_fails":
        if env.prj == "l7-proxy":
            res["warn"] = [10, 20]
            res["crit"] = [20, None]

    return res


def make_self_alerts():
    res = []
    for prj in PRJ:
        for geo in GEO:
            env = Env(prj, geo)

            for name, alert_template in sorted(self_alerts_template.items()):
                alert = custom_alert(env, name, alert_template)
                if not alert:
                    continue

                res.append(
                    self_alert(name, env,
                               alert["signal"],
                               alert["warn"],
                               alert["crit"],
                               alert.get("notifications", [L7_ALERTS_TELEGRAMM]),
                               alert["flaps"],
                               alert.get("value_modify"))
                )

    return res


sd_test_alerts_template = {
    "update_rate_{}": {
        "signal": "balancer_report-sd-{}-chaos-service-slave-update_summ",
        "warn": [0.1, 0.1],
        "crit": [0, 0.1],
        "value_modify": {
            "type": "aver",
            "window": 1200,
        }
    },
    "service_total_fail": {
        "signal": "balancer_report-report-service_total-fail_summ",
        "warn": [5, 5],
        "crit": [5, None],
    },
    "service_total_succ": {
        "signal": "balancer_report-report-service_total-succ_summ",
        "warn": [100, 200],
        "crit": [0, 100],
    },
}


def make_sd_global_alerts_template():
    res = {}

    for s in ["update_fail", "store_errors", "load_error", "load_errors", "update_errors", "requester_errors",
              "log_write_errors", "update_loop_overload"]:
        res[s] = {
            "signal": "balancer_report-sd-{}_summ".format(s),
            "warn": [1, 5],
            "crit": [5, 100],
        }

    res["remote_errors_perc"] = {
        "signal": "perc(balancer_report-sd-remote_errors_summ, balancer_report-sd-requests_summ)",
        "warn": [0.1, 0.5],
        "crit": [0.5, None],
        "flaps": {"stable": 30, "critical": 5 * 30},
    }

    return res


global_sd_alerts_template = make_sd_global_alerts_template()


def make_sd_test_alerts():
    res = []
    for name, alert_template in sorted(sd_test_alerts_template.items()):
        tmp = []
        if '{}' in name:
            for service in ['sas', 'sas-test', 'man', 'man-pre', 'vla', 'myt']:
                tmp.append((name.format(service), alert_template["signal"].format(service)))
        else:
            tmp.append((name, alert_template["signal"]))

        for geo in GEO:
            for name, signal in tmp:
                res.append(
                    make_alert_impl(
                        name="L7_sd_test_alerts.sd_test.{}.{}".format(geo, name),
                        tags={"itype": "balancer", "geo": geo, "ctype": "test", "prj": "sd-balancer-test"},
                        signal=signal,
                        crit=alert_template["crit"],
                        warn=alert_template["warn"],
                        notifications=alert_template.get("notifications",
                                                         [("telegram", ["kulikov", ], {"repeat": 60 * 60})]),
                        flaps=alert_template.get("flaps", {"stable": 25, "critical": 2 * 60}),
                        value_modify=alert_template.get("value_modify")
                    )
                )

    for name, alert_template in sorted(global_sd_alerts_template.items()):
        res.append(
            make_alert_impl(
                name="L7_sd_test_alerts.sd_global.all.{}".format(name),
                tags={"itype": "balancer"},
                signal=alert_template["signal"],
                crit=alert_template["crit"],
                warn=alert_template["warn"],
                flaps=alert_template.get("flaps"),
                notifications=alert_template.get("notifications", [("telegram", ["kulikov", ], {"repeat": 5 * 60})]),
                value_modify=alert_template.get("value_modify")
            )
        )
    return res


Jinja = """
{{
  "type": "panel",
  "editors": [],
  "charts": [
    <% set main_layout = create_main_layout("vertical", default_height=1, default_width=1) %>
    <% set geo_title = main_layout.sub("horizontal") %>
    <% for geo in ('sas', 'man', 'vla') %>
    {{ "type": "text",
        "text": "<<geo>>",
        <<geo_title.coords(width=5,height=0.2)>>,
    }},
    <% endfor %>
    <% for section in {SECT}|default({ALLSECT}) %>
    {{ "type": "text",
        "text": "<<section>>",
        <<main_layout.coords(width=15,height=0.2)>>,

    }},
    <% set geo_layout = main_layout.sub("horizontal") %>
    <% for geo in ('sas', 'man', 'vla') %>
        <% set geo_alerts = list_alerts(name_pattern='{NAMERE}' + section + '\.' + geo + '\..+') %>
        <% set alerts_layout = geo_layout.sub("flow", columns=5) %>
        <% for alert in geo_alerts|sort(attribute='name') %>
            {{"type": "alert",
             "name": "<<alert>>",
             "title": "<<alert.name.split(".")[3:]|join(' ')|replace('_', ' ')>>",
             <<alerts_layout.coords(height=0.25)>>,
            }},
        <% endfor %>
        <% if geo_alerts|length < 5 %>
            <% for i in range(5 - geo_alerts|length) %>
                {{ "type": "text",
                   "text": "",
                   <<alerts_layout.coords(height=0.25)>>,
                }},
            <% endfor %>
        <% endif %>
   <% endfor %>
   <% endfor %>

{SUFF}

  ]
}}
"""

external_alerts = """
    <% if not prj %>
        { "type": "text",
            "text": "external",
            <<main_layout.coords(width=15,height=0.2)>>,

        },
        <% set external_alerts = list_alerts(name_pattern='L7_external_alerts\..+') %>
        <% set external_alerts_layout = main_layout.sub("horizontal") %>
        <% for alert in external_alerts|sort(attribute='name') %>
            {"type": "alert",
             "name": "<<alert>>",
             "title": "<<alert.name.split(".")[1:]|join(' ')|replace('_', ' ')>>",
             <<external_alerts_layout.coords(height=0.25)>>,
            },
       <% endfor %>
   <% endif %>
"""

global_sd_alerts = """
    <% if not prj %>
        { "type": "text",
            "text": "global sd alerts",
            <<main_layout.coords(width=15,height=0.2)>>,

        },
        <% set external_alerts = list_alerts(name_pattern='L7_sd_test_alerts.sd_global\.all\..+') %>
        <% set external_alerts_layout = main_layout.sub("horizontal") %>
        <% for alert in external_alerts|sort(attribute='name') %>
            {"type": "alert",
             "name": "<<alert>>",
             "title": "<<alert.name.split(".")[1:]|join(' ')|replace('_', ' ')>>",
             <<external_alerts_layout.coords(height=0.25)>>,
            },
       <% endfor %>
   <% endif %>
"""


def make_L7_self_jinja():
    return Jinja.format(
        SECT="prj",
        ALLSECT="[{}]".format(
            ",".join(["'{}'".format(prj) for prj in PRJ])
        ),
        NAMERE="L7_alerts\.self\.",
        SUFF=external_alerts
    )


def make_L7_sd_test_jinja():
    return Jinja.format(
        SECT="sections",
        ALLSECT="['sd_test','']",
        NAMERE="L7_sd_test_alerts\.",
        SUFF=global_sd_alerts
    )


def generate_alerts(alerts, prefix):
    update_request = {
        "prefix": prefix,
        "alerts": alerts
    }

    with open(prefix + ".json", "w") as f:
        print >> f, json.dumps(update_request, indent=2)


def main():
    generate_alerts(make_self_alerts(), "L7_alerts")

    with open("L7_self.jinja", "w") as f:
        print >> f, make_L7_self_jinja()

    generate_alerts(make_sd_test_alerts(), "L7_sd_test_alerts")
    with open("L7_sd_test.jinja", "w") as f:
        print >> f, make_L7_sd_test_jinja()


if __name__ == "__main__":
    main()
