#!/usr/bin/env python
# -*- encoding: utf-8 -*-

"""
Монитринг "на хостах установлены правильные версии пакетов (как написано в ZK)".

Смотрит на конфиг приложений,
по кондукторным группам получает списки хостов,
берет текущие эталонные версии приложений (знает про лимтесты),
берет установленные версии, которые записал в ZK скрипт dt-write-versions-to-zk

Все сравнивает и отсылает события в juggler.

По -v пишет подробный вывод
По -a -- обрабатывает только указанное приложение

"""

import argparse
import datetime
import dateutil.parser
import direct_juggler.juggler as dj
import os
import re
import requests
import yaml
from kazoo.client import KazooClient
from kazoo.exceptions import NoNodeError

# первые n минут после записи версии в ZK расхождение версий за проблему не считаем, подробнее: st/DIRECT-119899
SILENCE_DURATION_IN_MINUTE = 20

ZK_HOSTS = "ppcback01f.yandex.ru,ppcback01e.yandex.ru,ppcback01i.yandex.ru"
ZK_TIMEOUT = 10
ZK_RETRY = {"max_tries": 1, "delay": 1, "max_jitter": 1, "backoff": 1, "ignore_expire": False}
zkh = KazooClient(hosts=ZK_HOSTS, timeout=ZK_TIMEOUT, connection_retry=ZK_RETRY, command_retry=ZK_RETRY)

APPS_CONFIG_PATH = '/etc/yandex-direct/direct-apps.conf.yaml'

HOSTS_ZK_LOCATION = '/direct/versions_current'
LIMTEST_LOCATION = '/direct/limtest'
LIMTEST_HOSTS_ZK_LOCATION = "/direct/limtest/%s/hosts"
LIMTEST_VERSIONS_ZK_LOCATION = "/direct/limtest/%s/versions"
PROD_VERSIONS_ZK_LOCATION = "/direct/versions"

DESCRIPTION_PREFIX = 'from: %s' % os.path.basename(__file__)

opts = None


def get_hosts_from_conductor(groups):
    """
    получить хосты кондукторных групп
    """
    return [
        (host, group)
        for group in groups
        for host in requests.get("http://c.yandex-team.ru/api/%s/%s" % ('groups2hosts', group)).content.split("\n")
        if host
    ]


def get_zk_dir(app):
    return "%s/%s" % (HOSTS_ZK_LOCATION, app)


def get_hosts_from_zk(app):
    """
    получить хосты из зукипера
    """
    try:
        return zkh.get_children(get_zk_dir(app))
    except NoNodeError:
        return []


def extract_version_from_zk_text(text):
    return text[:text.find("\n")]


def extract_version_update_time_from_zk_text(text):
    match = re.search("time=(\S*)", text)
    if match:
        try:
            return dateutil.parser.parse(match.group(1))
        except Exception as e:
            print("got exception when parse time: %s" % e)
            return None
    else:
        return None


def get_version_on_host(app, host):
    """
    получить текущую версию для заданного приложения и хоста
    """

    try:
        return extract_version_from_zk_text(zkh.get("%s/%s/%s/dpkg" % (HOSTS_ZK_LOCATION, app, host))[0])
    except NoNodeError:
        return 'no_version'


def load_apps_conf(config_path=APPS_CONFIG_PATH):
    with open(config_path, "r") as apps_fd:
        return yaml.load(apps_fd)['apps']


def delete_hosts(hosts_set, hosts_to_delete):
    for host in hosts_to_delete:
        try:
            hosts_set.remove(host)
        except:
            pass


def get_limtest_hosts(limtest, app):
    try:
        return [
            host.strip() for host in zkh.get("%s/%s" % (LIMTEST_HOSTS_ZK_LOCATION % limtest, app))[0].split("\n") if host
        ]
    except:
        return []


def get_limtest_version(limtest, app):
    try:
        return zkh.get("%s/%s" % (LIMTEST_VERSIONS_ZK_LOCATION % limtest, app))[0].strip()
    except:
        return []


def get_limtests():
    return zkh.get_children(LIMTEST_LOCATION)


def get_current_zk_version(app):
    return extract_version_from_zk_text(zkh.get("%s/%s" % (PROD_VERSIONS_ZK_LOCATION, app))[0].strip())


def get_current_zk_version_with_update_time(app):
    zk_text = zkh.get("%s/%s" % (PROD_VERSIONS_ZK_LOCATION, app))[0].strip()
    return extract_version_from_zk_text(zk_text), extract_version_update_time_from_zk_text(zk_text)


def check_conductor_vs_zk(**kwargs):
    """
    сравниваем список хостов из кондуктора со списком из зукипера
    """
    hosts_conductor = kwargs['hosts_conductor']
    hosts_zk = kwargs['hosts_zk']
    app = kwargs['app']
    hosts2group = kwargs['hosts2group']
    crit_messages = []

    if hosts_conductor != hosts_zk:
       absent_hosts_from_c = hosts_conductor - hosts_zk
       if absent_hosts_from_c:
           if opts.verbose:
               print u"### check_conductor_vs_zk / hosts presented in conductor but not in zk:\n%s\n" % u"\n".join([
                   "%s: %s" % (group, host) for host, group in sorted([
                       (host, hosts2group[host]) for host in absent_hosts_from_c
                   ])
               ])
           crit_messages.append("%d hosts presented in conductor but not in zk" % len(absent_hosts_from_c))

       absent_hosts_from_zk = hosts_zk - hosts_conductor
       if absent_hosts_from_zk:
           if opts.verbose:
               print u"### check_conductor_vs_zk / hosts presented in zk but not in conductor:\n%s\n" % u"\n".join(sorted(absent_hosts_from_zk))
               print u"You can delete them using this command:\ndelete-version-node %s" % " ".join([
                   os.path.join(get_zk_dir(app), host) for host in sorted(absent_hosts_from_zk)
               ])
           crit_messages.append("%d hosts presented in zk but not in conductor" % len(absent_hosts_from_zk))

    return crit_messages


def check_zookeeper_vs_deb(**kwargs):
    """
    проверяем, что на продакшеновых хостах стоит версия из зукипера
    """
    hosts_conductor = kwargs['hosts_conductor']
    app = kwargs['app']
    crit_messages = {h: None for h in hosts_conductor}

    zk_version, update_time = get_current_zk_version_with_update_time(app)
    now_time = datetime.datetime.now(update_time.tzinfo)
    if update_time and (update_time + datetime.timedelta(minutes=SILENCE_DURATION_IN_MINUTE)) >= now_time:
        print("skip check_zookeeper_vs_deb for %s, %s minutes haven't passed yet after updating zk_version" % (app, SILENCE_DURATION_IN_MINUTE))
        return crit_messages

    bad_hosts = []
    for host in hosts_conductor:
        host_version = get_version_on_host(app, host)
        if host_version != zk_version:
            crit_messages[host] = "version on host '%s' differs from version in zk: '%s'" % (host_version, zk_version)
            bad_hosts.append((host, host_version))

    if bad_hosts:
        if opts.verbose:
            print u"### check_zookeeper_vs_deb / current zk version='%s', hosts with wrong version:\n%s" % (
                zk_version, "\n".join("%s='%s'" % (host, version) for host, version in bad_hosts)
            )

    return crit_messages

def check_limtest_zookeeper_vs_deb(**kwargs):
    """
    проверяем, что на залоченных лимтестах стоит версия из лимтестовой zookeeper-ноды, а на разлоченных -- из продакшеновой
    """
    app = kwargs['app']
    limtests_data = kwargs['limtests_data']
    zk_version = get_current_zk_version(app)

    crit_messages = []
    bad_limtest_hosts = []
    for limtest in limtests_data:
        limtest_version = limtests_data[limtest]['version']
        # плохие хосты, относящиеся к конкретному лимтесту
        bad_limtest_hosts_part = []
        version_to_compare = limtest_version or zk_version

        for limtest_host in limtests_data[limtest]['hosts']:
            host_version = get_version_on_host(app, limtest_host)
            if host_version != version_to_compare:
                bad_limtest_hosts_part.append((limtest_host, host_version))

        bad_limtest_hosts+=bad_limtest_hosts_part

        if bad_limtest_hosts_part and opts.verbose:
            if limtest_version:
                template = u"locked on version '%s', host with wrong version: %s"
            else:
                template = u"unlocked, production version '%s', host with wrong version: %s"
            print (u"### check_limtest_zookeeper_vs_deb / %s: " + template) % (
                    limtest,
                    version_to_compare,
                    ", ".join("%s='%s'" % (host, version) for host, version in bad_limtest_hosts)
                    )

    if bad_limtest_hosts:
        crit_messages.append("%d limtest hosts with wrong version" % len(bad_limtest_hosts))

    return crit_messages




def parse_options():
    parser = argparse.ArgumentParser(
        description="Скрипт, отправляющий juggler-проверки conductor_vs_zk и zookeeper_vs_deb.\n"
            + "zookeeper_vs_deb - проверяем, что на продакшеновых хостах стоит версия из зукипера.\n"
            + "conductor_vs_zk - сравниваем список хостов из кондуктора со списком из зукипера.",
        formatter_class=argparse.RawTextHelpFormatter,
    )

    parser.add_argument('-v', '--verbose', action='store_true', help="подробный вывод")
    parser.add_argument('-a', '--app', dest="apps", action='append', default=[], help="приложения для проверки (java-api5 и т.д.), каждое приложение отдельным параметром")

    opts, extra = parser.parse_known_args()

    if len(extra) > 0:
        parser.error("There are unknown parameters")

    return opts


CHECKS = [
    {
        'service': 'conductor_vs_zk.%s',
        'cmd': check_conductor_vs_zk,
        'ignore-feature-name': 'conductor-vs-zk',
    },
    {
        'service': 'zookeeper_vs_deb.%s',
        'cmd': check_zookeeper_vs_deb,
        'ignore-feature-name': 'zk-vs-deb',
        'send_events_for_each_host': True,
    },
    {
        'service': 'limtest_zookeeper_vs_deb.%s',
        'cmd': check_limtest_zookeeper_vs_deb,
        'ignore-feature-name': 'zk-vs-deb',
    }
]


def run():
    global opts
    opts = parse_options()

    apps_conf = load_apps_conf()
    zkh.start()

    limtests = get_limtests()

    apps_to_check = apps_conf.keys()
    if len(opts.apps) > 0:
        apps_to_check = opts.apps

    for app in apps_to_check:
        try:
            if not apps_conf[app].get('conductor_groups', []):
                continue

            if opts.verbose:
                print "#### %s" % app

            hosts_conductor_data = get_hosts_from_conductor(apps_conf[app]['conductor_groups'])
            hosts2group = {host: group for host, group in hosts_conductor_data}
            hosts_conductor = set(host for host, _ in hosts_conductor_data)
            hosts_zk = set(get_hosts_from_zk(app))

            # удаляем лимтестовые хосты, которые берутся из зукипера
            # на них смотрим отдельно
            limtests_data = {}
            for limtest in limtests:
                limtest_hosts = get_limtest_hosts(limtest, app)
                if limtest_hosts:
                    limtests_data[limtest] = {'hosts': limtest_hosts, 'version': get_limtest_version(limtest, app)}

            for limtest in limtests_data:
                delete_hosts(hosts_zk, limtests_data[limtest]['hosts'])

        except Exception as e:
            for check in CHECKS:
                dj.queue_events([{
                    'service': check['service'] % app,
                    'status': 'CRIT',
                    'description': "; ".join([DESCRIPTION_PREFIX, 'exception occured during preparation: %s %s' % (type(e), e)])
                }])

        for check in CHECKS:
            try:
                if check['ignore-feature-name'] in apps_conf[app].get('ignore-features', []):
                    continue

                crit_messages = check['cmd'](
                    hosts_conductor=hosts_conductor,
                    hosts_zk=hosts_zk,
                    limtests_data=limtests_data,
                    app=app,
                    hosts2group=hosts2group,
                )

                if check.get('send_events_for_each_host', False):
                    events = []
                    for h in crit_messages:
                        event = {'service': check['service'] % app, 'host': h}
                        if crit_messages[h] is None:
                            event['status'] = 'OK'
                        else:
                            event['status'] = 'CRIT'
                            event['description'] = crit_messages[h]
                        events.append(event)
                    dj.queue_events(events)
                else:
                    if not crit_messages:
                        dj.queue_events([{'service': check['service'] % app, 'status': 'OK', 'description': DESCRIPTION_PREFIX}])
                    else:
                        dj.queue_events([{
                            'service': check['service'] % app,
                            'status': 'CRIT',
                            'description': "; ".join([DESCRIPTION_PREFIX, "-v for details"] + crit_messages)
                        }])

            except Exception as e:
                dj.queue_events([{
                    'service': check['service'] % app,
                    'status': 'CRIT',
                    'description': "; ".join([DESCRIPTION_PREFIX, 'exception occured during checking: %s %s' % (type(e), e)])
                }])

        if opts.verbose:
            print "-" * 40

    zkh.stop()


if __name__ == '__main__':
     run()
