#!/usr/bin/env python
# -*- coding: utf-8 -*-i

import sys
import os
import urllib2
import re
import MySQLdb
import json
import direct_juggler.juggler as dj
import yt.wrapper as yt
import argparse
from functools import cmp_to_key
import multiprocessing
from multiprocessing.pool import ThreadPool

sys.path.insert(0, '/opt/check-db-availability')
import check_db_utils

API_URL = "http://c.yandex-team.ru/api"
CONDUCTOR_GROUP = 'direct_ng_databases_mysql_ppcdata_ppcdata'
MYSQL_USER = 'direct-ro'
ALLDB_CONFIG_PATH = '/etc/yandex-direct/alldb-config.json'
CRIT_DESCRIPTION = 'Посмотреть подробный отчет можно запустив скрипт mysql_gtid_consistency.py с параметром -v'
SERVICE_NAME = 'gtid_consistency.%s'
PASSWORD_FILE = "/etc/direct-tokens/mysql_direct-ro"
TIMEOUT = 3
B2YT_GTIDS = {}
M2YT_GTIDS = {}

yt.config["token_path"] = '/etc/direct-tokens/yt_robot-direct-yt-ro'


def execute_db_query(host, port, password, user, query):
    db = MySQLdb.connect(host=host, port=port, user=user, passwd=password, charset='utf8', connect_timeout=TIMEOUT)
    cursor = db.cursor()
    cursor.execute(query)
    data = cursor.fetchall()
    db.close()
    return data


def get_data(host, port, password, user, query):
    pool = ThreadPool(1)
    result = pool.apply_async(execute_db_query, (host, port, password, user, query))
    try:
        data = result.get(timeout=TIMEOUT)
    except Exception as e:
        data = None

    pool.close()
    return data


def get_gtid(host, port, password, user):
    data = get_data(host, port, password, user, "show master status")
    if data is not None:
        return data[0][4]
    else:
        return data


def get_server_uuid(host, port, password, user):
    data = get_data(host, port, password, user, "select @@server_uuid")
    if data is not None:
        return data[0][0]
    else:
        return data


def parse_options():
    parser = argparse.ArgumentParser(
        formatter_class=argparse.RawDescriptionHelpFormatter,
        description='Наблюдает за транзакциями на ppcdata и отправляет рузультат в juggler',
        epilog="Пример использования:\n\t %(prog)s --verbose --juggler"
    )

    parser.add_argument('-v', '--verbose', action='store_true',
            dest="verbose", help="вывести результат на экран и не отправлять в juggler")

    parser.add_argument('-j', '--juggler', action='store_true',
            dest="juggler", help="отправить рузультат в juggler")

    return vars(parser.parse_args())


def find_errors(gtids, master_uuid, uuid2host):
    result = False
    log = ''
    gtid_dict = {}

    for gtid in gtids:
        # дополнительно отделяем элемент переводом строки
        gtid_formatted = re.sub(r'([^:,\n]+:)', r'\n\1', gtid[1].replace("\n", ''))
        found = re.findall(r'^([^:]+):(.+)$', gtid_formatted, re.M)
        for instance in found:
            gtid_name = instance[0].strip()
            interval = instance[1]

            if interval[-1] == ',':
                interval = interval[0:-1]

            if interval.find(',') >= 0:
                result = True
                log += u'присутствуют разрывы %s:%s\n' % (gtid_name, interval)
                continue

            if interval.find("-") < 0:
                interval = "%s-%s" % (interval, interval)

            if gtid_name not in gtid_dict:
                gtid_dict[gtid_name] = {}
            if interval not in gtid_dict[gtid_name]:
                gtid_dict[gtid_name][interval] = []
            gtid_dict[gtid_name][interval].append(gtid[0])

    bad_keys = 0
    for key in gtid_dict:
        if key != master_uuid and (len(gtid_dict[key]) > 1 or len(gtid_dict[key].values()[0]) != len(gtids)):
            bad_keys += 1

    if bad_keys > 0:
        log += u'списки позиций транзакций отличаются\n\n'
        for key in sorted(gtid_dict.keys()):
            if key != master_uuid and (len(gtid_dict[key]) > 1 or len(gtid_dict[key].values()[0]) != len(gtids)):
                log += u'%s (%s):\n' % (key, uuid2host[key] if key in uuid2host else u'этого UUID нет среди текущих')
                used_hosts = {}

                for interval in sorted(gtid_dict[key]):
                    for host in sorted(gtid_dict[key][interval]):
                        log += '(%s) %s\n' % (host, interval)
                        used_hosts[host] = 1

                for host in uuid2host.values():
                    if host not in used_hosts:
                        log += '(%s) -\n' % host

                log += '\n'
        result = True

    if log == '':
        log = u'отличий нет\n'

    return result, log


def check_instance(args):
    instance, alldb_config, db_config, options, password = args
    is_CRIT = False
    desc = ""

    try:
        master_host = check_db_utils.get_db_config_data(db_config, alldb_config[instance]['db_config_master_node'])
        master_port = check_db_utils.get_db_config_data(db_config, alldb_config[instance]['db_config_port_node'])
        replicas = alldb_config[instance]['replicas']
        default_port = alldb_config[instance]['mysql_port']
        skip_binlogbroker = alldb_config[instance].get('skip_binlogbroker_gtid_consistency_check', False)
        skip_b2yt = alldb_config[instance].get('skip_b2yt_gtid_consistency_check', False)
        skip_m2yt = alldb_config[instance].get('skip_m2yt_gtid_consistency_check', False)

        gtids = [(
            replica['host'],
            get_gtid(replica['host'], replica.get('mysql_port', default_port), password, MYSQL_USER)
        ) for replica in replicas]

        if instance.startswith('ppcdata'):
            if skip_b2yt:
                desc += u'пропущена проверка b2yt по настройке из alldb-config\n'
            else:
                gtids.extend(B2YT_GTIDS[instance])
        if instance.startswith('ppcdata'):
            if skip_binlogbroker:
                desc += u'пропущена проверка binlogbroker по настройке из alldb-config\n'
            else:
                gtids.extend(get_binlogbroker_gtids(instance))
        if instance.startswith('ppcdata') or instance == 'ppcdict':
            if skip_m2yt:
                desc += u'пропущена проверка m2yt по настройке из alldb-config\n'
            else:
                gtids.extend(M2YT_GTIDS[instance])

        not_alive_hosts = [host for host, gtid in gtids if not gtid]
        if not_alive_hosts:
            desc += u"не получилось достучаться до следующих хостов: %s\n" % ", ".join(not_alive_hosts)
            gtids = [el for el in gtids if el[1]]

        master_uuid = get_server_uuid(master_host, master_port, password, MYSQL_USER)
        if not master_uuid:
            master_uuid = ""

        uuid2host = {}
        for replica in replicas:
            uuid = (master_uuid
                    if replica['host'] == master_host
                    else get_server_uuid(replica['host'], replica.get('mysql_port', default_port), password, MYSQL_USER))

            if not uuid:
                continue

            uuid2host[uuid] = replica['host']

        res = find_errors(gtids, master_uuid, uuid2host)
        is_CRIT = res[0]
        desc += res[1]

        if options['juggler']:
            if is_CRIT:
                dj.queue_events([dict(service=SERVICE_NAME % instance, status='CRIT', description=CRIT_DESCRIPTION)])
            else:
                dj.queue_events([dict(service=SERVICE_NAME % instance, status='OK', description='OK')])

    except Exception as e:
        is_CRIT = True
        exception_desc = "unexpected exception: %s %s" % (type(e), e)
        desc += exception_desc

        if options['juggler']:
            dj.queue_events([dict(service=SERVICE_NAME % instance, status='CRIT', description=exception_desc)])

    return (instance, is_CRIT, desc)


def setup_yt(cluster):
    yt.config.set_proxy(cluster)

    yt.config['proxy']['request_timeout'] = 3 * 1000
    yt.config['proxy']['heavy_request_timeout'] = 3 * 1000
    yt.config['proxy']['proxy_ban_timeout'] = 3 * 1000
    yt.config['proxy']['retries']['count'] = 1
    yt.config['read_retries']['enable'] = True
    yt.config['read_retries']['count'] = 1

    # Настраиваем ретраи при запросах в динамические таблицы таким образом, чтобы весь скрипт не слишком долго выполнялся
    # (в кроне стоит таймаут на время выполнения 55 секунд)
    yt.config['dynamic_table_retries']['enable'] = True
    yt.config['dynamic_table_retries']['count'] = 2
    yt.config['dynamic_table_retries']['backoff'] = {
        'policy': 'constant_time',
        'constant_time': 3 * 1000
    }

def read_b2yt_gtids():
    global B2YT_GTIDS

    for cluster in ['seneca-man', 'seneca-sas', 'seneca-vla']:
        try:
            setup_yt(cluster)
            table_data = yt.select_rows('dbname, gtid_set from [//home/direct/mysql-sync/current/mysql-sync-states]')
        except:
            # этот скрипт не должен падать при недоступности YT'ей, реплик и тд. - пусть недоступность отслеживают другие профильные мониторинги
            table_data = []

        for row in table_data:
            ppcdata_name = 'ppcdata%d' % int(row['dbname'][row['dbname'].find(':') + 1:])
            if ppcdata_name not in B2YT_GTIDS:
                B2YT_GTIDS[ppcdata_name] = []

            B2YT_GTIDS[ppcdata_name].append(('b2yt_' + cluster, row['gtid_set']))

def read_m2yt_gtids():
    global M2YT_GTIDS

    for cluster in ['hahn', 'arnold']:
        try:
            setup_yt(cluster)
            table_data = yt.select_rows('dbname, gtid_set from [//home/direct/mysql-sync/current/mysql-sync-states]')
        except:
            # этот скрипт не должен падать при недоступности YT'ей, реплик и тд. - пусть недоступность отслеживают другие профильные мониторинги
            table_data = []

        for row in table_data:
            dbname = ''
            if row['dbname'] == 'ppcdict':
                dbname = 'ppcdict'
            else:
                dbname = 'ppcdata%d' % int(row['dbname'][row['dbname'].find(':') + 1:])
            if dbname not in M2YT_GTIDS:
                M2YT_GTIDS[dbname] = []

            M2YT_GTIDS[dbname].append(('m2yt_' + cluster, row['gtid_set']))


def get_binlogbroker_gtids(ppcdata):
    setup_yt('locke')
    try:
        res = [(
            'binlogbroker_locke',
            yt.get("//home/direct/binlogbroker/logbrokerwriter/production/production:ppc:%s-state/@counters/set" % (
                ppcdata[7:]
            ))
        )]
    except:
        res = []

    return res


def run():
    options = parse_options()
    with open(PASSWORD_FILE, "r") as fh:
        password = fh.read().strip()
    with open(ALLDB_CONFIG_PATH, 'r') as fh:
        alldb_config = json.load(fh)['instances']

    db_configs = {}
    instances = []
    for instance in alldb_config:
        if alldb_config[instance]['type'] != 'mysql' or not alldb_config[instance].get('has_gtid', False):
            continue

        if alldb_config[instance]['db_config'] not in db_configs:
            with open(alldb_config[instance]['db_config'], 'r') as fh:
                db_configs[alldb_config[instance]['db_config']] = json.load(fh)

        instances.append((instance, alldb_config, db_configs[alldb_config[instance]['db_config']], options, password))

    read_b2yt_gtids()
    read_m2yt_gtids()

    processes_pool = multiprocessing.Pool(processes=min(len(instances), multiprocessing.cpu_count()))
    results = processes_pool.map(check_instance, instances)
    processes_pool.close()
    processes_pool.join()

    sorted_results = sorted(
        results,
        key=cmp_to_key(
            lambda x, y: (
                (x[0] > y[0]) - (x[0] < y[0])
                if x[0][:7] != 'ppcdata' or y[0][:7] != 'ppcdata' or len(x[0]) == len(y[0])
                else len(x[0]) - len(y[0])
            )
        )
    )

    if options['verbose']:
        print "\n\n".join(["%s:\n%s" % (name, desc.strip()) for name, _, desc in sorted_results])

    if options['verbose'] or not options['juggler']:
        print "-" * 20
        print 'есть проблемы!' if any(ins[1] for ins in sorted_results) else 'нет проблем'

    return


if __name__ == '__main__':
    run()
