#!/usr/bin/env python3

import argparse
import pickle
import os
import subprocess
import sys

from datetime import datetime, timedelta


clickhouses = {
    'sas': ['yp-netmon-clickhouse-noc-sla-sas-11.sas.yp-c.yandex.net',
            'yp-netmon-clickhouse-noc-sla-sas-12.sas.yp-c.yandex.net'],
    'vla': ['yp-netmon-clickhouse-noc-sla-1.vla.yp-c.yandex.net',
            'yp-netmon-clickhouse-noc-sla-6.vla.yp-c.yandex.net',
            'yp-netmon-clickhouse-noc-sla-10.vla.yp-c.yandex.net',
            'yp-netmon-clickhouse-noc-sla-11.vla.yp-c.yandex.net'],
#   'man': ['yp-netmon-clickhouse-noc-sla-man-5.man.yp-c.yandex.net',
#           'yp-netmon-clickhouse-noc-sla-man-9.man.yp-c.yandex.net'],
    'man': ['yp-netmon-clickhouse-noc-sla-iva-1.iva.yp-c.yandex.net'],
    'myt': ['yp-netmon-clickhouse-noc-sla-myt-2.myt.yp-c.yandex.net'],
    'iva': ['yp-netmon-clickhouse-noc-sla-iva-1.iva.yp-c.yandex.net'],
}
clickhouses['vlx'] = clickhouses['vla']

BASE_DIR = os.path.dirname(sys.argv[0])
TOPOLOGY_FINDER_PATH = os.path.join(BASE_DIR, '../topology/topology-finder')
CACHE_PATH = os.path.join(BASE_DIR, '.check_hosts_in_switch_cache')


def load_cache():
    cache = {}
    if CACHE_PATH is None:
        return cache

    try:
        with open(CACHE_PATH, 'rb') as f:
            cache = pickle.load(f)
    except FileNotFoundError:
        pass
    except Exception as ex:
        print('Failed to load switch id cache: ', ex, file=sys.stderr)

    return cache


def save_cache(cache):
    if CACHE_PATH is None:
        return

    try:
        with open(CACHE_PATH, 'wb') as f:
            pickle.dump(cache, f)
    except Exception as ex:
        print('Failed to save switch id cache: ', ex, file=sys.stderr)


def get_switch_id_by_name(name):
    cache = load_cache()
    if name not in cache:
        text = subprocess.run(
            [TOPOLOGY_FINDER_PATH, '--switch-name', name],
            stdin=subprocess.DEVNULL, stdout=subprocess.PIPE,
            check=True
        ).stdout
        if text.strip().decode() == 'Switch ' + name + ': not found':
            raise RuntimeError('switch ' + name + ' not found in topology')

        cache[name] = int(text.split()[-1])
        save_cache(cache)

    return cache[name]


def generate_table_name():
    t = datetime.now() - timedelta(minutes=60)
    return datetime.strftime(t, 'probes_%Y%m%d%H')


def generate_datetime_param(table_name, minutes):
    y, m, d, h = table_name[7:11], table_name[11:13], table_name[13:15], table_name[15:17]
    return "toDateTime('{}-{}-{} {}:{:02d}:00')".format(y, m, d, h, minutes)


def main():
    try:
        clickhouse_user = os.environ['NETMON_CLICKHOUSE_USER']
        clickhouse_password = os.environ['NETMON_CLICKHOUSE_PASSWORD']
    except KeyError:
        print('Error: NETMON_CLICKHOUSE_USER and NETMON_CLICKHOUSE_PASSWORD env vars are not set.')
        sys.exit(1)

    parser = argparse.ArgumentParser(
        description='Show connectivity metrics of hosts in a single switch.',
        formatter_class=argparse.ArgumentDefaultsHelpFormatter
    )
    parser.add_argument('-d', dest='direction', default='tx', choices=['tx', 'rx'], help='Probe direction (default: %(default)s).')
    parser.add_argument('--rtt', action='store_true', help='Analyze rtt instead of connectivity')
    parser.add_argument('-t', dest='time', nargs=2, metavar=('FROM', 'UNTIL'),
                        default=argparse.SUPPRESS, type=int,
                        help='Time range (start and end in minutes)')
    parser.add_argument('switch', help='Switch name')
    parser.add_argument('table', nargs='?', default=generate_table_name(),
                        help='Clickhouse table name')
    args = parser.parse_args()

    switch_name = args.switch
    table_name = args.table
    time_from, time_to = args.time if hasattr(args, 'time') else (None, None)

    switch_id = get_switch_id_by_name(switch_name)

    print('Checking connectivity for switch id {} in {}'.format(switch_id, table_name))

    time_range_condition = ''
    if time_from is not None:
        time_range_condition += ' and Inserted > ' + generate_datetime_param(table_name, time_from)
    if time_to is not None:
        time_range_condition += ' and Inserted < ' + generate_datetime_param(table_name, time_to)

    if args.rtt:
        select_columns = 'quantile(0.95)(Rtt), quantile(0.5)(Rtt), avg(Rtt), max(Rtt), quantiles(0.5, 0.9, 0.95, 0.99)(Rtt)'
        order_by = 'quantile(0.95)(Rtt) desc'
    else:
        select_columns = 'avg(Score)'
        order_by = 'avg(Score)'

    endpoint = 'Source' if args.direction == 'tx' else 'Target'

    got_any_result = False
    if switch_name.startswith('lab-'):
        dc = switch_name[4:7]
    else:
        dc = switch_name[:3]
    for host in clickhouses[dc]:
        cmd = [
            'clickhouse-client', '--host', host,
            '--user', clickhouse_user, '--password', clickhouse_password,
            '--query',

            'select {endpoint}Fqdn, count(), {select_columns} from {table_name}'
            ' where {endpoint}Switch = {switch_id} and Network = 6 and Protocol = 2 and Score != -1'
            '{time_range_condition}'
            ' group by {endpoint}Fqdn order by {order_by} format PrettyCompact'
            .format(endpoint=endpoint, select_columns=select_columns, table_name=table_name,
                    switch_id=switch_id,
                    time_range_condition=time_range_condition,
                    order_by=order_by)
        ]

        quote = lambda arg: '"' + arg + '"' if ' ' in arg else arg
        print(
            ' '.join(quote(arg) for arg in cmd)
            .replace('--user ' + clickhouse_user, '--user $NETMON_CLICKHOUSE_USER')
            .replace('--password ' + clickhouse_password, '--password $NETMON_CLICKHOUSE_PASSWORD')
        )

        text = subprocess.run(cmd, stdout=subprocess.PIPE, check=True).stdout
        if len(text.splitlines()) > 1:
            got_any_result = True
        print(text.decode())

    if not got_any_result:
        print('No probes found. '
              'Note: this may happen if topology-finder is built without -DNOC_SLA_BUILD=1.',
              file=sys.stderr)

        # don't pollute cache with incorrect switch ids
        cache = load_cache()
        cache.pop(switch_name)
        save_cache(cache)

if __name__ == '__main__':
    main()
