import urllib2
import time
import json


"""
    Original script placed in arcadia/search/tools/sepe/waitinfo.py
    Will go to metasearchers with "/yandsearch?info=getwaitinfo" request
    and try to guess the slowest underlying instances
"""


class WaitInfo:
    def __init__(self, mmeta_instance, s):
        self.mmeta_instance = mmeta_instance

        ss = s.split('\t')
        self.raw = s

        self.time = int(ss[0])
        self.stage = ss[2]
        self.status = ss[3]
        self.host = ss[4]
        self.base_count = ss[5]
        self.source = ss[6]
        self.wait_time = int(ss[7])

    def __str__(self):
        return "{}\t{}".format(self.mmeta_instance, self.raw)

    def json(self):
        return {'instance': self.mmeta_instance, 'time': self.time, 'stage': self.stage,
                'status': self.status, 'base_count': self.base_count, 'source': self.source,
                'wait_time': self.wait_time, 'host': self.host}

    @classmethod
    def merge(cls, lst):
        res = lst[0].json()
        res['wait_time'] = sum([one.wait_time for one in lst]) / len(lst)
        return res

    @property
    def key(self):
        return self.stage, self.status, self.host, self.base_count, self.source


def get_results(expr, stage=None, sessions=5, timeout=60, sleep=5):
    results = []
    resolve_url = "http://resolver.clusterstate.yandex-team.ru/v1?term={}".format(urllib2.quote(expr))
    instances = json.loads(urllib2.urlopen(resolve_url, timeout=5).read())["instances"]

    failed = {}
    total = set()

    if not instances:
        return []

    for session in range(sessions):
        for instance in instances:
            host_port = "{}:{}".format(instance["host"], instance["port"])
            total.add(host_port)

            if host_port in failed:
                continue
            try:
                request = "http://{hp}/yandsearch?info=getwaitinfo".format(hp=host_port)
                result = urllib2.urlopen(request, timeout=0.3).read()
            except (KeyboardInterrupt, SystemExit):
                raise
            except Exception, e:
                failed[host_port] = str(e)
                continue
            except:
                failed[host_port] = "unknown error"
                continue

            if result:
                for s in result.split('\n'):
                    if s:
                        wi = WaitInfo(host_port, s)
                        if wi.time / 1000000 > time.time() - timeout and (not stage or stage == wi.stage):
                            results.append(wi)

        if session < sessions - 1:
            time.sleep(sleep)

    return list(set(results)), {'total': len(total), 'unanswered': len(failed)}


def quantile(data, level):
    wait_times = []
    for wi in data:
        wait_times.append(wi.wait_time)
    wait_times.sort()
    threshold = wait_times[int(level * len(wait_times) / 100.0)]
    return threshold


def filter_results(data, quantile_level=95, top_size=30):
    res = {}

    if data:
        threshold = quantile(data, quantile_level)
        for r in data:
            if r.wait_time > threshold:
                if r.key not in res:
                    res[r.key] = []
                res[r.key].append(r)
    res = [dict(WaitInfo.merge(r), cnt=len(r)) for r in res.itervalues()]
    res.sort(key=lambda x: x['cnt'], reverse=True)
    return res[:top_size]
