#!/skynet/python/bin/python

import os
import csv
import time
import random
import socket
import logging
import argparse
import datetime
import threading
from itertools import chain
from cStringIO import StringIO

import pkg_resources
pkg_resources.require('requests')
pkg_resources.require('skynet-heartbeat-server-service')

import pymongo
import requests  # noqa

from ya.skynet.services.heartbeatserver.bulldozer import helper  # noqa


# queries to initialize DB:
#   on each host:
#       CREATE TABLE IF NOT EXISTS heartbeat.cqudp_tasks ON CLUSTER '{cluster}' (uuid FixedString(36), timestamp DateTime, fqdn String, method String, remote_hosts UInt32, accounting_user String) ENGINE = ReplicatedMergeTree('/table_cqudp_tasks', '{replica}') PARTITION BY toYYYYMM(timestamp) ORDER BY (uuid, timestamp)
#       CREATE TABLE IF NOT EXISTS heartbeat.cqudp_tasks_extra_props ON CLUSTER '{cluster}' (timestamp DateTime, uuid FixedString(36), property String, value String) ENGINE = ReplicatedMergeTree('/table_cqudp_tasks_extra_props', '{replica}') PARTITION BY toYYYYMM(timestamp) ORDER BY (uuid, timestamp, property)
#       CREATE TABLE IF NOT EXISTS heartbeat.cqudp_tasks_history ON CLUSTER '{cluster}' (month UInt32, method String, total_hosts UInt32, average_hosts UInt32, accounting_user String) ENGINE = ReplicatedMergeTree('/table_cqudp_tasks_history', '{replica}') PARTITION BY month ORDER BY month
#       CREATE TABLE IF NOT EXISTS heartbeat.cqudp_tasks_extra_props_history ON CLUSTER '{cluster}' (month UInt32, total UInt32, property String, value String) ENGINE = ReplicatedMergeTree('/table_cqudp_tasks_extra_props_history', '{replica}') PARTITION BY month ORDER BY month


def parse_hosts_from_url(database_uri):
    clickhouse_prefix = 'clickhouse://'
    if not database_uri.startswith(clickhouse_prefix):
        raise Exception("Invalid database URI")
    return [host.strip() for host in database_uri[len(clickhouse_prefix):].split(',')]


def make_urls(hosts, db):
    make_url = lambda host: 'https://%(host)s/?database=%(db)s' % {'host': host, 'db': db}
    dc = socket.getfqdn()[:3]
    main_hosts = [make_url(host) for host in hosts if host.startswith(dc)]
    secondary_hosts = [make_url(host) for host in hosts if not host.startswith(dc)]
    return main_hosts, secondary_hosts


def parse_args():
    parser = argparse.ArgumentParser()
    parser.add_argument('--db', help='db name', required=True)
    parser.add_argument('--address', help='clickhouse url', required=True)
    parser.add_argument('-c', '--count-threshold', help='count threshold for sending the batch', type=int, default=1000)
    parser.add_argument('-t', '--time-threshold', help='time threshold for sending the batch', type=int, default=60)
    parser.add_argument('-v', '--verbose', help='increase logging level', action='store_true', default=False)
    parser.add_argument('--credentials', help='path to file with colon-separated clickhouse credentials')
    return parser.parse_args()


class ReportCollection(object):
    def __init__(self, hosts, db, count_threshold, time_threshold, user, password):
        self.op_lock = threading.Lock()
        self.queue = 0
        self.reports, self.subreports = None, None
        self.reinit_queue()
        self.primary_urls, self.secondary_urls = make_urls(hosts, db)
        self.auth = {
            'X-ClickHouse-User': user,
            'X-ClickHouse-Key': password,
        }
        self.count_threshold = count_threshold
        self.time_threshold = time_threshold
        self.last_time = time.time()

        self.mongo = helper.ReportDatabase()
        self.mongo_locks = self.mongo().locks
        self.mongo_locks.ensure_index('name', unique=True)

    def reinit_queue(self):
        new_reports, new_subreports = StringIO(), StringIO()
        new_reports_writer = csv.writer(new_reports, 'clickhouse')
        new_subreports_writer = csv.writer(new_subreports, 'clickhouse')

        reports, self.reports, self.reports_writer = self.reports, new_reports, new_reports_writer
        subreports, self.subreports, self.subreports_writer = self.subreports, new_subreports, new_subreports_writer
        queue, self.queue = self.queue, 0

        return queue, reports, subreports

    def watcher(self):
        while True:
            time.sleep(self.time_threshold)
            with self.op_lock:
                if self.queue and time.time() - self.last_time > self.time_threshold:
                    self.send_report()
                    self.last_time = time.time()

    def _lock(self, hostname):
        try:
            self.mongo_locks.insert({'host': hostname, 'name': 'clickhouse_cleaner', 'ts': time.time()})
            logging.debug('clickhouse lock: acquired first time')
            return True
        except:
            data = self.mongo_locks.find({'name': 'clickhouse_cleaner'})[0]

            if data.get('host', None) == hostname:
                self.mongo_locks.update(
                    {'host': hostname, 'name': 'clickhouse_cleaner'},
                    {'host': hostname, 'name': 'clickhouse_cleaner', 'ts': time.time()}
                )
                logging.debug('clickhouse lock: held by us')
                return True
            else:
                logging.debug(
                    'clickhouse lock: held by %s for %ds',
                    data.get('host', None),
                    time.time() - data.get('ts', 0)
                )
                if time.time() - data.get('ts', 0) > 600:
                    self.mongo_locks.remove({'host': data.get('host', None), 'name': 'clickhouse_cleaner'})

            return False

    def _unlock(self, hostname):
        try:
            self.mongo_locks.remove({'host': hostname, 'name': 'clickhouse_cleaner'})
        except:
            pass

    def _collect_old_tasks(self, url, date, partitions):
        query = (
            'INSERT INTO cqudp_tasks_history SELECT '
            'toYYYYMM(timestamp) AS month, method, '
            'sum(remote_hosts) AS total_hosts, '
            'avg(remote_hosts) AS average_hosts, '
            'accounting_user '
            'FROM cqudp_tasks WHERE month < %s '
            'GROUP BY month, method, accounting_user'
        ) % (date,)

        try:
            req = requests.post(
                url=url,
                headers=self.auth,
                params={'query': query},
                verify=False,
            )
            req.raise_for_status()
        except Exception:
            logging.exception('failed to insert aggregates from cqudp_tasks')
            return

        for partition in partitions:
            try:
                req = requests.post(
                    url=url,
                    headers=self.auth,
                    params={'query': "ALTER TABLE cqudp_tasks ON CLUSTER '{cluster}' DROP PARTITION %s" % (partition,)},
                    verify=False
                )
                req.raise_for_status()
            except Exception:
                logging.exception('failed to drop partition %s from cqudp_tasks', partition)
                continue

    def _collect_old_props(self, url, date, partitions):
        query = (
            'INSERT INTO cqudp_tasks_extra_props_history SELECT '
            'toYYYYMM(timestamp) AS month, count(*) AS total, '
            "property, if(property == 'object', replaceRegexpAll("
            "   replaceRegexpAll("
            "       replaceRegexpAll("
            "           replaceRegexpAll(value, ' at 0x[0-9a-f]+', ''),"
            "           'evlogdump -o -s [0-9]+ -e [0-9]+',"
            "           'evlogdump -o -s <XXXXX> -e <YYYYY>'"
            "       ),"
            "       '^(/place|/ssd)+/db/iss3/instances/[0-9a-zA-Z_-]+/evlogdump -o -s [^ ]+ -e [^ ]+ [^ ]+',"
            "       '\\1/db/iss3/instances/<IIIII>/evlogdump ...'"
            "   ),"
            "   'with timeout [0-9]+',"
            "   'with timeout XXX'"
            "), value) as value "
            'FROM cqudp_tasks_extra_props WHERE month < %s '
            'GROUP BY month, property, value'
        ) % (date,)

        try:
            req = requests.post(
                url=url,
                headers=self.auth,
                params={'query': query},
                verify=False,
            )
            req.raise_for_status()
        except Exception:
            logging.exception('failed to insert aggregates from cqudp_tasks_extra_props')
            return

        for partition in partitions:
            try:
                req = requests.post(
                    url=url,
                    headers=self.auth,
                    params={'query': "ALTER TABLE cqudp_tasks_extra_props ON CLUSTER '{cluster}' DROP PARTITION %s" % (partition,)},
                    verify=False
                )
                req.raise_for_status()
            except Exception:
                logging.exception('failed to drop partition %s from cqudp_tasks_extra_props', partition)
                continue

    def old_metrics_collector(self):
        hostname = socket.gethostname()

        while True:
            time.sleep(60 * 60 * 24)

            if not self._lock(hostname):
                continue

            try:
                url = random.choice(self.primary_urls)
                date = datetime.date.today().strftime('%Y%m')

                query = (
                    'SELECT DISTINCT toYYYYMM(timestamp) AS month '
                    'FROM cqudp_tasks WHERE month < %s'
                ) % (date,)

                try:
                    req = requests.get(
                        url=url,
                        headers=self.auth,
                        params={'query': query},
                        verify=False,
                    )
                    req.raise_for_status()
                except Exception:
                    logging.exception('failed to get list of old partitions from cqudp_tasks')
                    continue
                else:
                    partitions = req.text.strip().split('\n')

                self._collect_old_tasks(url, date, partitions)
                self._collect_old_props(url, date, partitions)
            finally:
                self._unlock(hostname)

    def add(self, report):
        uuid = report['uuid']
        timestamp = int(report['timestamp'])
        main_report = ('uuid', 'fqdn', 'method', 'remote_hosts', 'accounting_user', 'timestamp')
        self.reports_writer.writerow((uuid, timestamp, report['fqdn'], report['method'], report['remote_hosts'], report['accounting_user']))
        for key in main_report:
            del report[key]

        for key in report:
            self.subreports_writer.writerow((uuid, timestamp, key, str(report[key])))

        with self.op_lock:
            self.queue += 1
            if self.queue > self.count_threshold or time.time() - self.last_time > self.time_threshold:
                self.send_report()
                self.last_time = time.time()

    def make_query(self):
        queue, reports, subreports = self.reinit_queue()

        if not reports:
            return None, None, None, None

        header1 = "INSERT INTO cqudp_tasks (uuid, timestamp, fqdn, method, remote_hosts, accounting_user) FORMAT TabSeparated"
        reports1 = reports.getvalue()

        if not subreports:
            return header1, reports1, None, None

        header2 = "INSERT INTO cqudp_tasks_extra_props (uuid, timestamp, property, value) FORMAT TabSeparated"
        reports2 = subreports.getvalue()

        return header1, reports1, header2, reports2

    def send_report(self):
        query1, data1, query2, data2 = self.make_query()
        if not query1:
            return

        logging.info("sending tasks: %d bytes", len(data1))
        urls = chain(
            random.sample(self.primary_urls, len(self.primary_urls)),
            random.sample(self.secondary_urls, len(self.secondary_urls)),
        )

        for url in urls:
            try:
                req = requests.post(
                    url=url,
                    headers=self.auth,
                    params={"query": query1},
                    data=data1,
                    verify=False,
                )
                req.raise_for_status()
            except Exception:
                logging.exception("failed to send report to %r", url)
            else:
                logging.debug("sent report to %r", url)
                break

        if query2:
            logging.info("sending task props: %d bytes", len(data2))
            for url in urls:
                try:
                    req = requests.post(
                        url=url,
                        headers=self.auth,
                        params={"query": query2},
                        data=data2,
                        verify=False,
                    )
                    req.raise_for_status()
                except Exception:
                    logging.exception("failed to send report to %r", url)
                else:
                    logging.debug("sent report to %r", url)
                    break


def main():
    pymongo.pool.socket = socket  # remove gevent monkeypatching for pymongo to work in thread

    args = parse_args()
    hosts = parse_hosts_from_url(args.address)

    logging.basicConfig(level=logging.DEBUG if args.verbose else logging.INFO)

    if args.credentials:
        user, password = open(args.credentials).read().strip().split(':', 1)
    else:
        user = os.environ['CLICKHOUSE_USER']
        password = os.environ['CLICKHOUSE_PASSWORD']

    csv.register_dialect('clickhouse', delimiter='\t', lineterminator='\n', doublequote=True, quoting=csv.QUOTE_NONE, escapechar='\\')

    com = helper.Communicator().ready()
    collection = ReportCollection(hosts, args.db, args.count_threshold, args.time_threshold, user, password)

    t = threading.Thread(target=collection.watcher)
    t.daemon = True
    t.start()

    t2 = threading.Thread(target=collection.old_metrics_collector)
    t2.daemon = True
    t2.start()

    for host, _, data in com.read():
        try:
            logging.debug("got report from %s: %s", host, data['report'])
            collection.add(data['report'])
            com.ready()
        except (KeyError, TypeError, ValueError) as ex:
            com.discard(repr(ex))
