import logging
import requests
import urllib

from sandbox import sdk2
from sandbox.common.errors import TaskFailure
from sandbox.projects.yabs.base_bin_task import BaseBinTask


ADFOX_MAJOR_OWNERS_REPLICA_CLUSTERS = ["freud", "hahn", "arnold"]
ADFOX_MAJOR_OWNERS = "//home/yabs/dict/AdfoxMajorOwners"
YT_TOKEN_KEY = "yt_token"
SOLOMON_TOKEN_KEY = "solomon_token"
OWNER_STATISTICS_QUERY = """
    SELECT owner_id, count()
    FROM {table}
    WHERE today() - {days} <= date AND date < today()
    GROUP BY owner_id
    ORDER BY count() DESC
"""
ADFOX_DATABASE = "adfox"
SYSTEM_DATABASE = "system"


def get_secure_by_port(port):
    if port == 9440:
        return True
    if port == 9000:
        return False
    logging.error("Unknown port %d", port)
    return False


class YabsAdfoxFillMajorOwners(BaseBinTask):
    """
    Get major owners in adfox
    """

    class Requirements(sdk2.Requirements):
        cores = 1
        ram = 4096
        disk_space = 4096

        class Caches(sdk2.Requirements.Caches):
            pass

    class Parameters(BaseBinTask.Parameters):
        with BaseBinTask.Parameters.version_and_task_resource() as version_and_task_resource:
            resource_attrs = sdk2.parameters.Dict(
                "Filter resource by", default={"name": "YabsAdfoxFillMajorOwners"}
            )

        with sdk2.parameters.Group("ClickHouse master"):
            master_host = sdk2.parameters.String("ClickHouse master host", required=True)
            master_port = sdk2.parameters.Integer("ClickHouse master port", required=True, default=9000)
            master_user = sdk2.parameters.String("ClickHouse username for master host", required=True)
        with sdk2.parameters.Group("ClickHouse cluster"):
            cluster = sdk2.parameters.String("ClickHouse cluster", default="adfox_fast")
            cluster_user = sdk2.parameters.String("ClickHouse username for cluster host", required=True)
            cluster_password = sdk2.parameters.YavSecret("Yav secret with password for cluster user", required=True)
            cluster_table = sdk2.parameters.String("RTDStat table that will be used for statistics", default="RTDStatOwnerSharding")
        with sdk2.parameters.Group("YT"):
            yt_token = sdk2.parameters.YavSecret("Yav secret with yt token for dictionaries", required=True)
        with sdk2.parameters.Group("Solomon"):
            solomon_token = sdk2.parameters.YavSecret("Yav secret with solomon token", required=True)
        with sdk2.parameters.Group("Criteria parameters"):
            days = sdk2.parameters.Integer("Amount of days in a period", default=30)
            outlier_threshold = sdk2.parameters.Float("How much should shard differ from median in order to be outlier", default=1.3)

    def get_shards(self):
        from clickhouse_driver import Client as ClickHouseClient
        from yabs.stat.infra.clickhouse.lib import get_shards_for_cluster

        master_client = ClickHouseClient(
            host=self.Parameters.master_host,
            port=self.Parameters.master_port,
            user=self.Parameters.master_user,
            secure=get_secure_by_port(self.Parameters.master_port),
            database=SYSTEM_DATABASE,
            verify=False,
        )
        return get_shards_for_cluster(master_client, self.Parameters.cluster)

    def get_possible_major_owners(self):
        from clickhouse_driver import Client as ClickHouseClient

        self.cluster_password = self.Parameters.cluster_password.data()[self.Parameters.cluster_user]

        shard_stat = {}
        shards = self.get_shards()

        query = OWNER_STATISTICS_QUERY.format(
            table=self.Parameters.cluster_table,
            days=self.Parameters.days,
        )
        logging.info("Executing query %s", query)

        for shard, hosts in shards.iteritems():
            logging.info("Processing shard %s with hosts %s", shard, hosts)
            success_shard = False
            for host, port in hosts:
                client = ClickHouseClient(
                    host=host,
                    port=port,
                    user=self.Parameters.cluster_user,
                    password=self.cluster_password,
                    secure=get_secure_by_port(port),
                    database=ADFOX_DATABASE,
                    verify=False,
                )

                try:
                    shard_stat[shard] = client.execute(query)
                    success_shard = True
                    break
                except Exception as e:
                    logging.warning("Failed to get statistics from host %s port %s: %s", host, port, e)

            if not success_shard:
                logging.error("Failed to process shard %s", shard)
                raise TaskFailure("Failed to process some shards, see logs")

        shard_row_count = {}
        row_count = []
        for shard, stat in shard_stat.items():
            shard_row_count[shard] = sum([row[1] for row in stat])
            row_count.append(shard_row_count[shard])
        logging.info("Row count by each shard: %s", shard_row_count)
        median = sorted(row_count)[len(row_count) // 2]
        logging.info("Median: %d", median)

        major_owners = set()
        for shard, row_count in shard_row_count.items():
            if row_count / median >= self.Parameters.outlier_threshold:
                owner_id = shard_stat[shard][0][0]
                logging.info("Shard %d is an outlier: %d / %d = %s >= %s; Adding its first owner %d to major owners",
                             shard, row_count, median, row_count / median, self.Parameters.outlier_threshold, owner_id)
                major_owners.add(owner_id)
        return major_owners

    def get_current_major_owners(self):
        from yt.wrapper import YtClient

        self.yt_token = self.Parameters.yt_token.data()[YT_TOKEN_KEY]

        major_owners = set()
        for replica_cluster in ADFOX_MAJOR_OWNERS_REPLICA_CLUSTERS:
            ytc = YtClient(proxy=replica_cluster, token=self.yt_token)
            rows = ytc.select_rows("owner_id from [//home/yabs/dict/AdfoxMajorOwners]")
            major_owners |= set(row["owner_id"] for row in rows)
        return major_owners

    def notify_about_major_owners(self, possible_major_owners, current_major_owners):
        major_owners = possible_major_owners - current_major_owners
        logging.info("Received %d major owners", len(major_owners))
        last_major_owner = -1
        if len(major_owners) > 0:
            logging.info("Possible new major owners:")
            for major_owner in major_owners:
                last_major_owner = major_owner
                logging.info(str(major_owner))

        self.solomon_token = self.Parameters.solomon_token.data()[SOLOMON_TOKEN_KEY]
        SOLOMON_URL = 'http://solomon.yandex.net/api/v2/push?' + \
        urllib.urlencode(
            {
                'project': 'yabs',
                'cluster': 'adfox_fast',
                'service': 'fill_major_owners',
            }
        )
        SOLOMON_HEADERS = {
            "Content-Type": "application/json",
            "Authorization": "OAuth {}".format(self.solomon_token)
        }
        metrics = [
            {
                'labels': {
                    'major_owners': 'pending',
                },
                'value': last_major_owner,
            },
        ]
        payload = {
            'metrics': metrics,
        }
        resp = requests.post(SOLOMON_URL, json=payload, headers=SOLOMON_HEADERS)
        try:
            resp.raise_for_status()
            logging.info('Pushed result to solomon')
        except Exception:
            logging.error('Failed to push to solomon: %s', resp.text)

    def on_execute(self):
        logging.info("Started YabsAdfoxFillMajorOwners")

        possible_major_owners = self.get_possible_major_owners()
        current_major_owners = self.get_current_major_owners()
        self.notify_about_major_owners(possible_major_owners, current_major_owners)
        logging.info("Done")
