#!/usr/bin/env python
# -*- coding: utf8 -*-

import logging
import time
import requests
import json
import urllib
from time import sleep

from sandbox.sandboxsdk import environments
from sandbox import sdk2
import sandbox.common.types.client as ctc
from sandbox.common.errors import TaskError

SOLOMON_URL = "https://solomon.yandex.net/api/v2/push?" + \
    urllib.urlencode(
        {
            "project": "yabs",
            "cluster": "yabs",
            "service": "push_to_logbroker",
        }
    )
MAX_SEND_METRICS = 50000
SEND_ONLY_TOTAL = False
LOGTYPES_WITH_HITLOGID = ["performance", "performance_debug", "dsp", "dsp-checked", "undodsp", "ssp"]
AUDIT_TABLE = "//home/yabs/stat/LogBrokerAudit"


"""
Копирует данные из yt.LogBrokerAudit в Solomon,
чтобы там можно было сравнивать с графиками отправки
"""


class SolomonPusher(object):
    def __init__(self, solomon_token, cluster_name, retries_qty, retry_interval, force=False):
        self.solomon_token = solomon_token
        self.cluster_name = cluster_name
        self.retries_qty = retries_qty
        self.retry_interval = retry_interval
        self.force = force

    def __push__(self, rows):
        headers = {
            'Content-type': 'application/json',
            'Authorization': 'OAuth %s' % self.solomon_token,
        }
        data = {
            "commonLabels": {
                "type": 'output',
                "yt_cluster": self.cluster_name,
            },
            "sensors": [{
                "labels": {"logtype": row["LogType"], "host": row["Hostname"]},
                "ts": int(row["UnixTime"]),
                "value": int(row["Count"]),
            } for row in rows if not('HostID' in row and SEND_ONLY_TOTAL)],
        }
        logging.info(json.dumps(data))
        parameters = "&ack=full" if self.force else ""
        response = requests.post(
            SOLOMON_URL + parameters,
            data=json.dumps(data),
            headers=headers
        )
        response.raise_for_status()

    def push(self, rows):
        for _ in range(self.retries_qty):
            try:
                tmp_rows = []
                for row in rows:
                    tmp_rows.append(row)
                    if len(tmp_rows) > 10000:
                        self.__push__(tmp_rows)
                        tmp_rows = []

                if len(tmp_rows) > 0:
                    self.__push__(tmp_rows)

                logging.info("Send %d metriks" % (len(rows)))
                break
            except Exception as e:
                logging.warning("Couldn't send %d rows to Solomon: %s" % (len(rows), e))
                sleep(self.retry_interval)
        else:
            raise TaskError("Coundn't push data to Solomon in %d tries" % (self.retries_qty))


class Worker(object):
    def __init__(self, cluster_name, table_name, yt_token, solomon_token,
                 observation_interval, retries_qty, retry_interval,
                 force_pushing):
        self.table_name = table_name
        self.observation_interval = observation_interval

        from yt.wrapper import YtClient
        cfg = {
            "default_value_of_raw_option": False,
            "token": yt_token,
            "proxy": {"url": cluster_name},
        }
        self.yt_client = YtClient(config=cfg)
        self.pusher = SolomonPusher(solomon_token, cluster_name, retries_qty,
                                    retry_interval, force_pushing)

    def fetch_data(self, time_range):
        gen = self.yt_client.select_rows("""
                min(UnixTime),
                max(UnixTime)
            from
                [%s]
            where
                    UnixTime >= %d
                and
                    UnixTime < %d
                and
                    Count > SentCount
            group by
                1
        """ % (self.table_name, time_range[0], time_range[1]))

        min_unixtime = max_unixtime = 0
        for row in gen:
            min_unixtime = row["min(UnixTime)"]
            max_unixtime = row["max(UnixTime)"]

        rows = []
        if max_unixtime >= min_unixtime and max_unixtime != 0:
            hosts = {}
            engine_hosts = {}

            gen = self.yt_client.select_rows("""
                    HostID,
                    Hostname
                from
                    [//home/yabs/frontend/hosts]
            """)
            for row in gen:
                hosts[row["HostID"]] = row["Hostname"]

            gen = self.yt_client.select_rows("""
                    LogIDSuffix,
                    HostID
                from
                    [//home/yabs/frontend/hosts]
                where Mode in ("metapartner", "metasearch", "metarank")
            """)
            for row in gen:
                engine_hosts[row["LogIDSuffix"]] = row["HostID"]

            # просуммируем сами, т.к. у Соломона с этим проблемы
            gen = self.yt_client.select_rows("""
                    UnixTime,
                    LogType,
                    sum(Count)
                from
                    [%s]
                where
                        UnixTime >= %d
                    and
                        UnixTime <= %d
                group by
                    UnixTime,
                    LogType
            """ % (self.table_name, min_unixtime, max_unixtime))
            for row in gen:
                row["Count"] = row["sum(Count)"]
                row["Hostname"] = "all"
                rows.append(row)

            gen = self.yt_client.select_rows("""
                    UnixTime,
                    LogType,
                    HostID,
                    Count
                from
                    [%s]
                where
                        UnixTime >= %d
                    and
                        UnixTime <= %d
                    and
                        Count > SentCount
                limit %d
            """ % (AUDIT_TABLE, min_unixtime, max_unixtime, MAX_SEND_METRICS))
            for row in gen:
                if row["HostID"] == 0:
                    row["Hostname"] = "unknown"
                else:
                    try:
                        if row["LogType"] in LOGTYPES_WITH_HITLOGID:
                            row["Hostname"] = hosts[engine_hosts[row["HostID"]]]
                        else:
                            row["Hostname"] = hosts[row["HostID"]]
                    except Exception as err:
                        logging.error("Can't decode HostID %d with LogType %s: %s" % (row["HostID"], row["LogType"], err))
                        continue

                rows.append(row)

        logging.info("Fetched %d rows" % len(rows))

        return rows

    def process_time_range(self, time_range):
        logging.info("Processing time range %d - %d" % (time_range[0], time_range[1]))
        while True:
            rows = self.fetch_data(time_range)
            if len(rows) > 0:
                self.pusher.push(rows)
                self.yt_client.insert_rows(
                    AUDIT_TABLE,
                    [
                        {
                            "UnixTime": x["UnixTime"],
                            "LogType": x["LogType"],
                            "HostID": x["HostID"],
                            "SentCount": x["Count"],
                        } for x in rows if 'HostID' in x
                    ],
                    update=True
                )
            else:
                logging.info("No rows selected")

            if len(rows) < MAX_SEND_METRICS:
                break

    def work(self, batch_size):
        now = int(time.time())
        for t in range(now - self.observation_interval, now, batch_size):
            self.process_time_range((t, t + batch_size))


class YabsLogbrokerAudit(sdk2.Task):
    """Task to send diagnostic metrics to Solomon"""

    class Parameters(sdk2.Task.Parameters):
        with sdk2.parameters.CheckGroup('Names of yt clusters', required=True) as clusters:
            clusters.choices = (
                ('ZENO', 'zeno'),
                ('SENECA-SAS', 'seneca-sas'),
                ('SENECA-VLA', 'seneca-vla'),
                ('SENECA-MAN', 'seneca-man'),
                ('HAHN', 'hahn'),
                ('ARNOLD', 'arnold'),
            )

        interval = sdk2.parameters.Integer(
            "Data observation time (seconds)",
            default=3600
        )

        batch_size = sdk2.parameters.Integer(
            "Query time interval",
            default=3600
        )

        force_pushing = sdk2.parameters.Bool(
            "Force push to all Solomon's clusters",
            default=False
        )

        retries_qty = sdk2.parameters.Integer(
            "Solomon pushing retries qty",
            default=3
        )

        retry_interval = sdk2.parameters.Integer(
            "Solomon retry interval (seconds)",
            default=10
        )

        yt_token_secret = sdk2.parameters.YavSecret(
            "Yav secret with yt token",
            default="sec-01d4mdr98tm9n1k6a3rmyn685t"
        )

        solomon_token_secret = sdk2.parameters.YavSecret(
            "Yav secret with solomon token",
            default="sec-01eca9c44wtrkdjsfygbz7pdtr"
        )

    class Requirements(sdk2.Task.Requirements):
        environments = (
            environments.PipEnvironment("yandex-yt"),
            environments.PipEnvironment('yandex-yt-yson-bindings-skynet', use_wheel=True),
        )
        client_tags = ctc.Tag.LINUX_PRECISE
        cores = 1

        class Caches(sdk2.Requirements.Caches):
            pass

    def on_execute(self):
        for cluster_name in self.Parameters.clusters:
            Worker(
                cluster_name,
                AUDIT_TABLE,
                self.Parameters.yt_token_secret.data()["YT_TOKEN"],
                self.Parameters.solomon_token_secret.data()["solomon_token"],
                self.Parameters.interval,
                self.Parameters.retries_qty,
                self.Parameters.retry_interval,
                self.Parameters.force_pushing
            ).work(self.Parameters.batch_size)
