# -*- coding: utf-8 -*-

import logging
import datetime
import requests
import time
import re
import socket

from dateutil.parser import parse
from dateutil.relativedelta import relativedelta
from collections import Counter
from sandbox import sdk2


class MarketBotNotifier(sdk2.Task):
    """The task aggregates top ip addresses according to query count and notifies if
    there are too many queries.
    """

    class Parameters(sdk2.Task.Parameters):
        url = sdk2.parameters.Url(
            "Clickhouse host url",
            default="https://clickhouse-public.market.yandex.net",
            required=True
        )

        minutes_history = sdk2.parameters.Integer(
            "Time range to aggregate (in minutes)",
            default=30,
            required=True
        )

        notification_threshold = sdk2.parameters.Integer(
            "Minimal query count per ip address to become a 'critical' one",
            default=4000,
            required=True
        )

    class QueryParams(object):
        def __init__(self, date_to_select, datetime_from, datetime_to, notification_threshold):
            self._date_to_select = str(date_to_select)
            self._datetime_from = str(datetime_from)
            self._datetime_to = str(datetime_to)
            self._notification_threshold = notification_threshold

        @property
        def date_to_select(self):
            return self._date_to_select

        @property
        def datetime_from(self):
            return self._datetime_from

        @property
        def datetime_to(self):
            return self._datetime_to

        @property
        def notification_threshold(self):
            return self._notification_threshold

    class CriticalQuery(object):
        def __init__(self, ip, count):
            self._count = count
            self._ip = ip
            self._hostname = MarketBotNotifier._resolve_host_name(ip)

        @property
        def count(self):
            return self._count

        @property
        def ip(self):
            return self._ip

        @property
        def hostname(self):
            return self._hostname

        def __str__(self):
            return "{{\"ip\": \"{}\", \"count\": {}, \"hostname\": \"{}\"}}" \
                .format(self._ip, self._count, self._hostname)

        def __repr__(self):
            return str(self)

    @staticmethod
    def _resolve_host_name(ip):
        """Resolve hostname by ip address ."""

        assert isinstance(ip, str), "ip address should be a string but it is {}".format(ip)

        if not ip:
            return "unknown"
        try:
            name, _, _ = socket.gethostbyaddr(ip)
        except:
            return "unknown"
        return name

    @staticmethod
    def _catch_clickhouse_exception(clickhouse_response):
        assert clickhouse_response.find("DB::Exception") < 0, \
            "Clickhouse exception occurred: {}".format(clickhouse_response)

    @staticmethod
    def _do_clickhouse_query(query,
                             url,
                             timeout=1500,
                             max_retry_count=3,
                             sleep_delay=1):
        """Query a clickhouse host and return a response.
        Return None if retries count exceed.

        Keyword arguments:
        query -- SQL query in clickhouse syntax
        url -- url of a clickhouse host
        timeout -- timeout (in milliseconds) per request (default 1500)
        max_retry_count -- count of retries (default 3)
        sleep_delay -- time (in seconds) to sleep between retries (default 2)
        """

        assert isinstance(query, str), "query should be a string but it is {}".format(type(query))
        assert isinstance(url, str), "url should be a string but it is {}".format(type(url))
        assert isinstance(timeout, int) and timeout > 0, \
            "timeout should be a positive integer but {} of type it is {}".format(timeout, type(timeout))
        assert isinstance(max_retry_count, int) and max_retry_count > 0, \
            "max_retry_count should be a positive integer but it is {} of {}" \
            .format(max_retry_count, type(max_retry_count))
        assert isinstance(sleep_delay, int) and sleep_delay >= 0, \
            "sleep_delay should be a non-negative integer but it is {} of {}" \
            .format(sleep_delay, type(sleep_delay))

        tries_left = max_retry_count
        exception = None
        while tries_left > 0:
            try:
                response = requests.post(url,
                                         params={"query": query},
                                         timeout=timeout)
                return str(response.text)
            except requests.exceptions.RequestException as exception:
                tries_left -= 1
                if tries_left > 0:
                    time.sleep(sleep_delay)
        assert False, "cannot connect to {}, retry limit exceeded\nException is: {}".format(url, exception)

    def _get_host_datetime(self):
        """Get current date and time of a host."""

        date_time = MarketBotNotifier._do_clickhouse_query("SELECT now()", self.url)
        MarketBotNotifier._catch_clickhouse_exception(date_time)

        return parse(date_time)

    def _partition_by_query_params(self, current_datetime):
        """Partition time range from current_datetime - self.minutes_history
        to current_datetime by days and return list of proper QueryParams.
        """

        assert isinstance(current_datetime, datetime.datetime), \
            "current_datetime should be an instance of datetime.datetime but it is {} of {}" \
            .format(current_datetime, type(current_datetime))

        range_begin = current_datetime + relativedelta(minutes=-self.minutes_history)
        days_in_range = (current_datetime - range_begin).days
        for _ in range(max(days_in_range, 1)):
            next_day = range_begin \
                       + relativedelta(days=+1, hour=0, minute=0, second=0)
            datetime_to = min(next_day, current_datetime)
            params = MarketBotNotifier.QueryParams(range_begin.date(),
                                                   range_begin,
                                                   datetime_to,
                                                   self.notification_threshold)
            range_begin = next_day
            yield params

    def _fill_query(self, query_params):
        """Create a string of a query using given parameters."""

        assert isinstance(query_params, MarketBotNotifier.QueryParams), \
            "query_params should be an instance of QueryParams but it is {}".format(type(query_params))

        return self.query_template.format(**{
            "date_to_select": query_params.date_to_select,
            "datetime_from": query_params.datetime_from,
            "datetime_to": query_params.datetime_to,
            "notification_threshold": query_params.notification_threshold
        })

    # TODO: It's better to check the format of the response in some strict way.
    @staticmethod
    def _parse_response(response):
        """Parse CH's response which should be presented in the following format:
        <ip_address_1>\t<request_count_1>
        ...
        <ip_address_n>\t<request_count_n>
        """

        splitted_response = re.sub(R"\s+", " ", response).strip().split()
        ips = splitted_response[0::2]
        counts = splitted_response[1::2]
        assert len(ips) == len(counts), \
            ("Count of ip addresses and count of their occurrences are different with the response:\n"
             "{}").format(response)

        return Counter({ip: int(count) for ip, count in zip(ips, counts)})

    @staticmethod
    def _render_output(critical_queries):
        return "\n".join(str(q) for q in sorted(critical_queries, key=lambda query: query.count, reverse=True))

    # TODO: current solution seeks only for top-500 ip addresses. Generalize maybe?
    def _count_top_ip_addresses(self):
        """Count the most frequent ip addresses."""

        host_datetime = self._get_host_datetime()
        param_partitioner = self._partition_by_query_params(host_datetime)
        result = Counter()

        for params in param_partitioner:
            query = self._fill_query(params)
            response = self._do_clickhouse_query(query, self.url)
            MarketBotNotifier._catch_clickhouse_exception(response)
            result += self._parse_response(response)
        return result

    def _get_critical_queries(self, top_ip_addresses):
        """Filter top ip addresses and construct critical queries."""

        assert isinstance(top_ip_addresses, Counter), \
            "top_ip_addresses should be a collections.Counter but it is of {}".format(type(top_ip_addresses))

        return [MarketBotNotifier.CriticalQuery(ip, count)
                for ip, count in top_ip_addresses.items() if count >= self.notification_threshold]

    def _notify(self, critical_queries):
        pass

    def _parse_parameters(self):
        self.url = str(self.Parameters.url)
        self.minutes_history = int(self.Parameters.minutes_history)
        self.notification_threshold = int(self.Parameters.notification_threshold)

        assert self.minutes_history > 0, \
            "minutes_history should be a positive integer but it is {}" \
            .format(self.minutes_history)
        assert self.notification_threshold, \
            "notification_threshold should be a positive integer but it is {}" \
            .format(self.notification_threshold)

        logging.info("host: {}".format(self.url))
        logging.info("trying to acquire {} minute(s) of history".format(self.minutes_history))
        logging.info("critical threshold is {}".format(self.notification_threshold))

    def on_execute(self):
        self.query_template = ("SELECT\n"
                               "    client_ip\n"
                               "    , count(*) as request_count\n"
                               "FROM market.nginx2\n"
                               "WHERE\n"
                               "    date = '{date_to_select}'\n"
                               "    AND CAST(timestamp as DateTime)\n"
                               "        BETWEEN '{datetime_from}' AND '{datetime_to}'\n"
                               "    AND environment = 'PRODUCTION'\n"
                               "    AND client_ip NOT LIKE '%:%' -- exclude IPv6\n"
                               "GROUP BY client_ip\n"
                               "HAVING request_count >= {notification_threshold}\n"
                               "ORDER BY request_count DESC\n"
                               "LIMIT 500\n")
        try:
            self._parse_parameters()

            top = self._count_top_ip_addresses()
            critical_queries = self._get_critical_queries(top)
            rendered_queries = self._render_output(critical_queries)
            self._notify(critical_queries)

            data_to_log = "no critical queries" if len(critical_queries) == 0 else rendered_queries
            logging.info(data_to_log)
        except AssertionError as error:
            logging.error(error)
            raise RuntimeError("A runtime error occurred. Check the common.log to find a reason.")
