#!/usr/bin/python
#-*- coding: utf-8 -*-

from juggler_sdk import JugglerApi, Check, Child, FlapOptions, NotificationOptions
from directadmon.juggler_helpers import AggrChecks, hash_merge, juggler_merge, hash_replace, cgroups_to_children, jdumps

import argparse
import copy
import sys
import logging
import logging.handlers
logging.getLogger(__name__).addHandler(logging.NullHandler())

SOLOMON_TTL = 3600*3

DEFAULT_CHECK = {
    'aggregator': 'logic_or',
    'ttl': 60*15,
    'tags': ['directadmin_TV', 'direct_group_sre'],
    'notifications': [
        NotificationOptions(template_name='push', template_kwargs=dict(push_url='http://juggler-history.da.yandex.ru/save_notifications'), description=u'сервис для хранения истории по проверкам')
    ],
}

APPDUTY_CHECK = juggler_merge(DEFAULT_CHECK, {'tags': ['direct-appduty-tv']})
APPDUTY_ONLY_CHECK_NP = hash_merge(DEFAULT_CHECK, {'tags': ['direct-appduty-tv_np']})

CALLS_CHAT_ONLY = copy.deepcopy(DEFAULT_CHECK)
CALLS_CHAT_ONLY['ttl'] = 60*10 # 10 минут (и 10 проверок) - тут уже неактуален маленький ttl, почти все звонящие проверки - централизованные с logic_and, недоступность одного хоста на них не влияет
CALLS_CHAT_ONLY['tags'].append('directadmin.calls')
CALLS_CHAT_ONLY['notifications'].extend([
    NotificationOptions(
        template_name='on_status_change', template_kwargs=dict(status=['CRIT'], method=['telegram'], login='Direct-Monitoring-Calls'), description=u'чат для звонящих проверок'
    ),
    NotificationOptions(
        template_name='startrek', template_kwargs=dict(queue='DIRECTMON', status=['CRIT'], components='directadmin.calls'),
        description=u'тикеты по каждому звонку'
    ),
])

CALLING_CHECK_NO_FLAP = hash_merge(CALLS_CHAT_ONLY, {})
CALLING_CHECK_NO_FLAP['notifications'].append(NotificationOptions(template_name='phone_escalation', template_kwargs={'logins': ['@svc_direct-app-duty:direct-production-duty', 'robot-direct-admin', 'pe4kin', 'yukaba'], 'delay': 60, 'on_success_next_call_delay': 90}, description=u'звонки'))

CALLING_CHECK = hash_merge(CALLING_CHECK_NO_FLAP, {'flaps_config': FlapOptions(stable=60*3, critical=60*10)})

CALLING_CHECK_NO_FLAP_MYSQL = hash_merge(CALLS_CHAT_ONLY, {})
CALLING_CHECK_NO_FLAP_MYSQL['notifications'].append(NotificationOptions(template_name='phone_escalation', template_kwargs={'logins': ['@svc_direct-app-duty:direct-production-duty','robot-direct-admin', 'pe4kin', 'yukaba'], 'delay': 60, 'on_success_next_call_delay': 90}, description=u'звонки'))

CALLING_CHECK_MYSQL = hash_merge(CALLING_CHECK_NO_FLAP_MYSQL, {'flaps_config': FlapOptions(stable=90, critical=60*3)})

GT_20PCT_IS_CRIT = {
    'aggregator': 'timed_more_than_limit_is_problem',
    'aggregator_kwargs': {
        'limits': [
            {
                'day_start': 1,
                'day_end': 7,
                'time_start': 0,
                'time_end': 23,
                'warn': 0,
                'crit': '20%',
            },
        ],
    },
}

AGGR_CHILD_TAGS = ['directadmin.aggregation_child']
READY_TAGS = ['directadmin.ready_for_operation'] + DEFAULT_CHECK['tags']

LOGIC_AND =  {
    'aggregator': 'logic_and',
    'aggregator_kwargs': {
        'downtimes_mode': 'ignore',
    }
}

UNREACH_READY = {
    'aggregator_kwargs': {
        'unreach_mode': 'skip',
    },
    'unreach_service': [{'check': ':ready_for_operation', 'hold': 900}],
}

# остальные unreach, скорее всего не должны быть нужны, unreach_ready достаточно
##UNREACH_PING = {'aggregator_kwargs': {'unreach_mode': 'skip'}, 'unreach_service': 'UNREACHABLE'}
##UNREACH_MYSQL = {'aggregator_kwargs': {'unreach_mode': 'skip'}, 'unreach_service': lambda suff: 'mysql_alive_' + suff }
##UNREACH_MONGO = {'aggregator_kwargs': {'unreach_mode': 'skip'}, 'unreach_service': lambda suff: 'mongodb-%s_alive' % (suff,) }

ONLY_DAY = {
    'aggregator': 'timed_more_than_limit_is_problem',
    'aggregator_kwargs': {
        'ignore_nodata': 'no',
        'limits': [
            {
                'time_start': 10, # start 10 == 10:00:00
                'time_end': 20, # end 20 == 20:59:59
                'day_start': 1,
                'day_end': 5,
                'crit': 0,
                'warn': 0,
            },
            {
                'time_start': 21, # 21:00:00
                'time_end': 9, # 09:59:59
                'day_start': 1,
                'day_end': 5,
                'crit': '101%',
                'warn': 0
            },
            {
                'time_start': 0,
                'time_end': 23,
                'day_start': 6,
                'day_end': 7,
                'crit': '101%',
                'warn': 0
            }
        ]
    }
}

# для mysql_call()
GT_65PCT_IS_CRIT =  {
    'aggregator': 'more_than_limit_is_crit',
    'aggregator_kwargs': {
        'downtimes_mode': 'ignore',
        'percent': 65,
    }
}

GT_65PCT_IS_CRIT_DAY = copy.deepcopy(ONLY_DAY)
GT_65PCT_IS_CRIT_DAY['aggregator_kwargs']['limits'][0]['crit'] = '65%'
GT_65PCT_IS_CRIT_DAY['aggregator_kwargs']['downtimes_mode'] = 'ignore'

COMMON_CHECKS = ['dns_local', 'ntp_stratum', 'unispace',] #, 'iptruler', 'eth_speed'
COMMON_CHECKS_MDB = []
WALLE_CHECKS = ['walle_bmc','walle_cpu','walle_cpu_capping','walle_disk','walle_fs_check','walle_link','walle_memory','walle_meta','walle_reboots','walle_tainted_kernel',]
WALLE_NOTV = ['walle_cpu_capping', 'walle_fs_check']

MYSQL_COMMON_CHECKS = [
    #'mysql_checksum_active_',
    #'mysql_checksum_errors_',
    'mysql_alive_',
    'mysql_monitor_age_',
    'mysql_trx_max_age_',
    'threads_running_',
    'threads_connected_',
]

MYSQL_COMMON_SLAVE_CHECKS = [
    'mysql_slave_behind_',
    'mysql_slave_running_',
]

MYSQL_XTRADB_CHECKS = [
    'xtradb_wsrep_cluster_size_',
    'xtradb_wsrep_cluster_status_',
    'xtradb_wsrep_connected_',
    #'xtradb_wsrep_local_state_', # state и state_comment фактически дублируют друг друга
    'xtradb_wsrep_local_state_comment_',
]

MYSQL_HEALTH_HOST = "direct.prod_mysql-health"
DISASTER_ALERTS_HOST = "direct.disaster_alerts"


class DirectAdminChecks(AggrChecks):
    '''
    Часто используемые directadmin-проверки
    Неплохо бы, чтобы все методы принимали children в формате make_aggr_checks, аналогично AggrChecks
    '''
    def __init__(self, *args, **kwargs):
        default_check = DEFAULT_CHECK
        if kwargs.get('namespace') != 'direct.prod':
            default_check['notifications'] = []
        kwargs['aggr_child_tags'] = kwargs.get('aggr_child_tags', AGGR_CHILD_TAGS)
        kwargs['default_kwargs'] = kwargs.get('default_kwargs', default_check)
        super(DirectAdminChecks, self).__init__(*args, **kwargs)


    def append_common(self, checks=COMMON_CHECKS, children=None, rfo_children=None, common_children=None, skip_rfo_on_common=False, skip_ready_for_operation=False):
        # активная проверка ping + ssh, можно удалить после полного перехода на ready_for_op
        self.append_aggr(service='ready_for_operation', children=children if not rfo_children else rfo_children, ttl=180, tags=READY_TAGS, **GT_20PCT_IS_CRIT)

        # железные проверки
        # TODO: переделать на hw-watcher проверки, актуальность этих уже сомнительна
        children = children if not common_children else common_children
        for service in checks:
            self.append_aggr(service=service, children=children, **UNREACH_READY if not skip_rfo_on_common else {'aggregator_kwargs': {'unreach_mode': 'skip',}, 'unreach_service': []})

	if not skip_ready_for_operation:
            self.append_aggr(service='yandex-ansible-pull', children=children, ttl=3600*2, **hash_merge(GT_20PCT_IS_CRIT, UNREACH_READY if not skip_rfo_on_common else {'aggregator_kwargs': {'unreach_mode': 'skip',}, 'unreach_service': []}, GT_20PCT_IS_CRIT))


    def append_walle(self, checks=WALLE_CHECKS, children=None, skip_rfo=False):
        # железные проверки
        # hw-watcher проверки
        for service in checks:
            tags = self.default_check['tags']
            if service in WALLE_NOTV:
                tags = [x for x in tags if x != 'directadmin_TV']
            self.append_aggr(service=service, children=children, tags=tags, **UNREACH_READY if not skip_rfo else {'aggregator_kwargs': {'unreach_mode': 'skip',}, 'unreach_service': []})


    def append_mysql(self, children=None, lfw_children=None, spec_children=None, cluster_settings_children=None,
                     zkguard_children=None, skip_grants=False, skip_backup_success=False, skip_slave_checks=False,
                     xtradb=False, mixed_mode=False, skip_check_config=False, skip_mysql_check_tests=False):
        # children == None означает, что children будут взяты из self.children
        # чтобы этого неявно не произошло для lfw_children, определять его лучше в начале
        # skip_grants -- костыль для graphite, у которого не настроена проверка грантов
        lfw_children = lfw_children if lfw_children is not None else children
        spec_children = spec_children if spec_children is not None else children
        cluster_settings_children = cluster_settings_children if cluster_settings_children is not None else children

        # mysql'ные чеки, не зависящие от названия инстанса
        if not skip_grants:
            # execution_interval=120
            self.append_aggr_full(child_kwargs=dict(service='mysql_grants_file_age', ttl=120*15, **UNREACH_READY),
                                    aggr_kwargs=dict(service='mysql_grants'), children=children)

        # execution_interval=600, refresh_time менять не надо
        # для правильной работы этой проверки с unreach нужно ставить mode: force_ok
        # тогда ready-for-operation покрасит ppcstandby:backup_success в OK и на всем шарде проверка будет OK по logic_and
        if not skip_backup_success:
            self.append_aggr(service='backup_success', aggregator='logic_and', ttl=600*15, children=children, **hash_merge(UNREACH_READY, {'aggregator_kwargs': {'unreach_mode': 'force_ok'}}))


        # поинстансные mysql'ные чеки
        services = MYSQL_COMMON_CHECKS if not xtradb else MYSQL_COMMON_CHECKS + MYSQL_XTRADB_CHECKS
        if not skip_slave_checks:
            services.extend(MYSQL_COMMON_SLAVE_CHECKS)

        for service in services:
            self.append_aggr_full(child_kwargs=dict(service=lambda aggr_suff: service + aggr_suff, **hash_merge(LOGIC_AND if mixed_mode else {}, UNREACH_READY)),
                                  aggr_kwargs=dict(service=service[0:-1]), children=children)

        if not skip_check_config and (cluster_settings_children or cluster_settings_children is None):
                for service in ['mysql_check_config_']:
                   self.append_aggr_full(child_kwargs=dict(service=lambda aggr_suff: service + aggr_suff, **UNREACH_READY),
                                          aggr_kwargs=dict(service=service[0:-1]), children=cluster_settings_children)

        if not skip_mysql_check_tests and (spec_children or spec_children is None):
            service = 'mysql_check_tests_'
            self.append_aggr_full(child_kwargs=dict(service=lambda aggr_suff: service + aggr_suff, ttl=600*15, **UNREACH_READY),
                                  aggr_kwargs=dict(service=service[0:-1]), children=spec_children)

        if zkguard_children or zkguard_children is None:
            service = 'zkguard.working.'
            self.append_aggr_full(child_kwargs=dict(service=lambda aggr_suff: service + aggr_suff, **UNREACH_READY),
                                  aggr_kwargs=dict(service=service[0:-1]), children=zkguard_children)

        # lfw/lm проверок нет на некоторых базах (sandbox) и на xtradb
        if not xtradb and (lfw_children or lfw_children is None):
            for service in ['lfw-', 'lm-schema-']:
                self.append_aggr_full(child_kwargs=dict(service=lambda aggr_suff: service + aggr_suff, **UNREACH_READY),
                                      aggr_kwargs=dict(service=service[0:-1]), children=lfw_children)


    def append_mysql_call(self, **kwargs):
        # подмешиваем переданные параметры к дефолтным (могли не передать никакого aggregator, children и тд)
        # именно replace, а не merge, чтобы перезатереть aggr_kwargs
        kwargs = hash_replace(GT_65PCT_IS_CRIT, kwargs)

        # для работы нужен yandex-du-mysql-monitor >= 3.80
        # заводятся на direct.prod_mysql.call:mysql_slave_behind_ppclog, ...
        for service in ['mysql_slave_behind_', 'mysql_slave_running_']:
            child_kwargs = dict(service=lambda aggr_suff: service + aggr_suff,
                              host=self.aggr_host + '.call')

            # Порядок мерджа важен! Сначала общий дефолт - golem_check, потом переданные аргументы (aggr*, children),
            # потом зависящие от этой функции названия сервиса и хост
            child_kwargs = hash_merge(CALLING_CHECK, kwargs, child_kwargs)
            self.append_aggr_single(**child_kwargs)


### directadmin helpers

def add_default_args(parser):
    parser.add_argument('--apply', default=False, action='store_true', help='применять все изменения в juggler')
    parser.add_argument('-v', '--verbosity', default=0, action='count', help='подробный вывод (1 - только изменившиеся проверки, 3 - debug)')
    parser.add_argument('-p', '--pretty', default=False, action='store_true', help='удобочитаемый вывод json-конфигов (diff) проверок')
    parser.add_argument('-t', '--token', default='~/.juggler-token', help='juggler api token')


def get_dashboard_url(cluster_id, name):
    return {
        'title': 'solomon MySQL dash',
        'type': 'yasm',
        'url': 'https://solomon.yandex-team.ru/?project=direct&dashboard=mdb-mysql&cluster={cluster_id}&instance={instance}&b=6h&e=&graphOnly='.format(
            cluster_id=cluster_id,
            instance=name,
        ),
    }


def init_root_logger(args):
    logger = logging.getLogger()
    logger.setLevel(logging.INFO - args.verbosity)
    handler = logging.StreamHandler(sys.stdout)

    handler.setLevel(logging.INFO - args.verbosity)
    handler.setFormatter(logging.Formatter('[%(asctime)s]\t%(message)s'))
    logger.addHandler(handler)
    return logger


def get_namespace_host(filename):
    (namespace, host) = filename.split('_', 1)
    host = host[0:-3] if host.endswith('.py') else host
    host = namespace + '_' + host
    return namespace, host

def get_disaster_tags(disaster):
    tags = ['warden_auto_source',
            'warden_alert_create_spi',
            'warden_functionality_%s' % disaster.get('functionality_slug'),
           ]
    tags.append('warden_alert_category_' + disaster.get('category', 'boolean'))
    if disaster.get('flow'):
        tags.append('warden_alert_start_flow')
    if disaster.get('ydt'):
        tags.append('warden_alert_account_metric')
    if disaster.get('bydt'):
        tags.append('warden_alert_account_background_metric')
    return tags

