from datetime import datetime
import calendar
import time
import logging
import random
import requests
import json

import pkg_resources

pkg_resources.require('skynet-heartbeat-server-service')
pkg_resources.require('requests')

from ya.skynet.services.heartbeatserver.bulldozer import helper

from common import InstanceTypes


def parse_hosts_from_url(database_uri):
    clickhouse_prefix = 'clickhouse://'
    if not database_uri.startswith(clickhouse_prefix):
        raise Exception("Invalid database URI")
    return [host.strip() for host in database_uri[len(clickhouse_prefix):].split(',')]


def get_ch_formatted_date_from_timestamp(ts):
    # XXX: No pytz on heartbeat servers :( :(
    hopefully_moscow_date = datetime.fromtimestamp(ts).strftime('%Y-%m-%d')
    return hopefully_moscow_date


def assert_moscow_timezone():
    """ Make sure our timezone is Moscow Time
    """

    # Since gmtime() and localtime() are two different calls, we need to ignore a few seconds of difference
    assert abs(calendar.timegm(time.gmtime()) - calendar.timegm(time.localtime()) + 10800) < 60


class _HttpBufferingSender(object):
    MAX_ROWS_TO_SEND = 250000
    N_HEARTBEAT_SERVERS = 10
    N_HTTP_RETRIES = 5

    def __init__(self, http_hosts, generate_dump_http_query, avg_seconds_between_queries=2.0, extra_data=None, headers=None):
        self.http_hosts = http_hosts
        self.avg_seconds_between_queries = avg_seconds_between_queries
        self.generate_dump_http_query = generate_dump_http_query
        self.extra_data = extra_data
        self.headers = headers if headers else {}

        self._next_send_time = self._get_next_send_time()
        self._communicator = helper.Communicator()
        self._buffer = None

    def _reset_buffer(self):
        raise NotImplementedError()

    @property
    def _rows_collected(self):
        raise NotImplementedError()

    def _consume_data(self, host, data):
        raise NotImplementedError()

    def _flush_data(self):
        raise NotImplementedError()

    def _get_next_send_time(self):
        """ Clickhouse recommends making no more than 1 INSERT/sec
            Since we have multiple servers, we make sure to wait a random amount of time
            to space out requests more or less evenly.

            Solomon by default allows one insert per 15 seconds
        """
        return (
            time.time() + self.avg_seconds_between_queries * random.uniform(0.8, 1.2) * self.N_HEARTBEAT_SERVERS
        )

    def _execute_query(self, query, n_http_retries):
        # generate postfix with auth
        url_postfix = ''
        if self.extra_data:
            assert (url_postfix == '')
            url_postfix = 'push?project={project}&cluster={cluster}&service={service}'.format(**self.extra_data)

        for retry_number in xrange(n_http_retries):
            for host in random.sample(self.http_hosts, len(self.http_hosts)):
                try:
                    response = requests.post("http://{}/{}".format(host, url_postfix), data=query, headers=self.headers)
                    response.raise_for_status()
                    logging.debug("inserted to %s, query length = %i", host, len(query))
                    return
                except requests.RequestException:
                    length = min(len(query), 1000)
                    logging.exception("Failed to make HTTP request to {}, first {} bytes of query: {}".format(host, length, query[:length]))
            time.sleep(0.2)
        raise RuntimeError()

    def run(self):
        logging.info("Starting sre instanceusage plugin")
        assert_moscow_timezone()

        self._reset_buffer()
        self._communicator.ready()

        for host, _, data in self._communicator.read():
            self._communicator.ready()
            try:
                self._consume_data(host, data)
                if (
                    time.time() > self._next_send_time
                    or self._rows_collected >= self.MAX_ROWS_TO_SEND
                ):
                    logging.info(
                        "current time = %s; next send time = %s; n reports = %i",
                        time.time(), self._next_send_time, self._rows_collected,
                    )
                    assert_moscow_timezone()
                    self._flush_data()
                    self._reset_buffer()
                    self._next_send_time = self._get_next_send_time()
                    logging.info('new next send time = %s', self._next_send_time)
            except Exception:
                logging.exception("WTF! FML! Unexpected exception")


class InstanceUsageBufferingSender(_HttpBufferingSender):
    def _reset_buffer(self):
        self._buffer = {
            type_: [] for type_ in InstanceTypes.ALL
        }

    @property
    def _rows_collected(self):
        return max(len(val) for val in self._buffer.itervalues())

    def _consume_data(self, host, data):
        for instance_report in data['report']:
            if instance_report is None:
                continue

            if 'ts' not in instance_report:
                logging.warning("WTF! report without ts field")
                continue

            report_type = instance_report.get('type', InstanceTypes.ISS)
            if report_type not in InstanceTypes.ALL:
                logging.error("Unknown report type %s, ignoring report", report_type)
                continue

            instance_report['ts'] = int(instance_report['ts'])
            instance_report['formatted_date'] = get_ch_formatted_date_from_timestamp(instance_report['ts'])
            self._buffer[report_type].append((host, instance_report))

            # remove when host type will be supported
            if report_type == InstanceTypes.HOST:
                legacy = _get_legacy_host_report(instance_report)
                self._buffer[InstanceTypes.ISS].append((host, legacy))

    def _flush_data(self):
        ch_errored = False

        for report_type, hosts_and_reports in self._buffer.iteritems():
            for query in self.generate_dump_http_query(report_type, hosts_and_reports):
                try:
                    self._execute_query(query, self.N_HTTP_RETRIES)
                    logging.debug('inserted type %s', report_type)
                except RuntimeError:
                    ch_errored = True
                    logging.critical(
                        "Giving up after %i retries to %i replicas for report type %s",
                        self.N_HTTP_RETRIES, len(self.http_hosts), report_type
                    )

        if ch_errored:
            raise RuntimeError("Giving up on sending data to backend")


def _get_legacy_host_report(instance_report):
    legacy = instance_report.copy()
    legacy['port'] = 65535
    legacy['group'] = ''
    legacy['major_tag'] = 0
    legacy['minor_tag'] = 0
    legacy['version'] = 0
    return legacy


class InfreqBufferingSender(_HttpBufferingSender):
    def _reset_buffer(self):
        self._buffer = []

    @property
    def _rows_collected(self):
        return len(self._buffer)

    def _consume_data(self, host, data):
        for instance_report in data['report']:
            if instance_report is None:
                continue

            if 'ts' not in instance_report:
                logging.warning("WTF! report without ts field")
                continue

            instance_report['ts'] = int(instance_report['ts'])
            instance_report['formatted_date'] = get_ch_formatted_date_from_timestamp(instance_report['ts'])
            instance_report['volumes_size'] = json.dumps(
                instance_report.get('volumes_size', dict())).replace("'", "\\'")  # RX-303
            self._buffer.append((host, instance_report))

    def _flush_data(self):
        query = self.generate_dump_http_query(self._buffer)
        if query is None:
            logging.error("Http query is None")
            return

        self._execute_query(query, self.N_HTTP_RETRIES)
