#!/usr/bin/env python3.5
# -*- coding: utf-8 -%-
"""
Check the size of Distributed tables that should be quite small in normal
circumstances.
"""

import argparse
import logging
import subprocess
import sys
from urllib.parse import quote
import socket

import requests
from humanfriendly import parse_size, format_size


def main():
    """
    Program entry point.
    """
    try:
        args = parse_args()

        if args.debug:
            logging.basicConfig(level='DEBUG')
        else:
            logging.basicConfig(level='CRITICAL')

        do_check(args)
    except Exception as e:
        logging.exception('')
        die(2, 'Error: ' + repr(e))


def parse_args():
    """
    Parse command-line arguments.
    """
    args_parser = argparse.ArgumentParser()
    args_parser.add_argument(
        '-d', '--debug',
        action="store_true",
        help='Enable debug output.')
    args_parser.add_argument(
        '-w', '--warn',
        default='512MB',
        help='Warning threshold.')
    args_parser.add_argument(
        '-c', '--crit',
        default='2048MB',
        help='Critical threshold.')
    args_parser.add_argument(
        '-p', '--port',
        type=int,
        help='ClickHouse HTTP(S) port.')
    args_parser.add_argument(
        '-s', '--ssl',
        action="store_true",
        help='Use HTTPS rather than HTTP.')
    args_parser.add_argument(
        '--ca_bundle',
        help='Path to CA bundle to use.')

    return args_parser.parse_args()


def do_check(args):
    """
    Perform the check.
    """
    warn_threshold = parse_size(args.warn, binary=True)
    crit_threshold = parse_size(args.crit, binary=True)

    host = socket.gethostname()
    if args.port:
        port = args.port
    else:
        port = 8443 if args.ssl else 8123

    status = 0
    issues = []
    for table in get_distributed_tables(host, port, args.ssl, args.ca_bundle):
        size = get_table_size(table)
        if size < warn_threshold:
            continue

        if size < crit_threshold:
            status = max(1, status)
        else:
            status = 2

        issues.append('{db}.{name} ({size})'.format(
            db=table['database'],
            name=table['name'],
            size=format_size(size, binary=True)))

    if status == 0:
        die(0, 'OK')

    die(status,
        "Found distributed tables with abnormal size: %s" % ', '.join(issues))


def get_distributed_tables(host, port, ssl, ca_bundle):
    """
    Return list of Distributed tables where each table is represented by dict
    with keys "name" and "database".
    """
    protocol = 'https' if ssl else 'http'
    verify = ca_bundle if ca_bundle else ssl
    r = requests.get(
        '{0}://{1}:{2}'.format(protocol, host, port),
        params={
            'query': 'SELECT database, name FROM system.tables '
                     'WHERE engine = \'Distributed\' FORMAT JSON'
        },
        timeout=5,
        verify=verify)
    r.raise_for_status()
    return r.json()['data']


def get_table_size(table):
    """
    Return table size.
    """
    path = get_table_path(table)
    return int(run('sudo du -sb %s' % path).split()[0])


def get_table_path(table):
    """
    Return path to table directory on file system.
    """
    return '/var/lib/clickhouse/data/{db}/{name}'\
        .format(db=quote(table['database'], safe=''),
                name=quote(table['name'], safe=''))


def run(cmd):
    """
    Run the command and return its output.
    """
    result = subprocess.run(cmd, shell=True, check=True,
                            stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    return result.stdout.decode()


def die(status, message):
    """
    Emit status and exit.
    """
    print('%s;%s' % (status, message))
    sys.exit(0)


if __name__ == '__main__':
    main()
