#!/skynet/python/bin/python
# coding=utf-8

# import json
import logging
import sys
import time
import multiprocessing
import collections
import optparse

NIE_MSG = "This method needs to be overwritten in subclass"


def print_float(format_str, v):
    return format_str.format(v).rstrip("0").rstrip(".")


class Event(object):
    __slots__ = ("fields", "time", "name")

    def __init__(self, fields):
        self.fields = fields
        self.time = int(fields[0])
        self.name = fields[2]

    def __getitem__(self, key):
        return self.fields[key]

    def __len__(self):
        return len(self.fields)


class ReqCtx(object):
    """Shared per-frame object between different counters."""

    def __init__(self):
        self.request_time = None


class CounterBase(object):
    """Base object for all counters (counter can read events in frame and calc some results)."""

    def __init__(self):
        self.req_ctx = ReqCtx()

    def on_frame_start(self):
        """ вызывается в начале каждого фрейма """
        pass

    def handle(self, event):
        raise NotImplementedError(NIE_MSG)

    def get_results(self):
        raise NotImplementedError(NIE_MSG)

    # def print_dbg(self, *args):
    #     pass


class RequestTime(CounterBase):
    def __init__(self):
        super(RequestTime, self).__init__()
        self.start = None
        self.finish = None

    def handle(self, event):
        if event.name == "ContextCreated":
            self.start = event.time
        elif event.name == "ReportPrinted":
            self.finish = event.time

    def get_results(self):
        self.req_ctx.request_time = self.finish - self.start if (self.finish and self.start) else None
        yield ("RequestTime", "microseconds"), self.req_ctx.request_time


class YdoRequestTime(RequestTime):
    def __init__(self):
        super(YdoRequestTime, self).__init__()

    def handle(self, event):
        if event.name == "ContextCreated":
            self.start = event.time
        elif event.name == "ReportPrinted" or event.name == "PageRetrieved":
            self.finish = event.time


class RequestCounter(CounterBase):
    """Base for counters that work only for request frames (skips ReAsk, Push and other)."""

    def do_handle(self, event):
        raise NotImplementedError(NIE_MSG)

    def do_get_results(self):
        raise NotImplementedError(NIE_MSG)

    def handle(self, event):
        if self.req_ctx.request_time:
            self.do_handle(event)

    def get_results(self):
        if self.req_ctx.request_time:
            for r in self.do_get_results():
                yield r


class MainTaskTime(RequestCounter):
    def __init__(self):
        super(MainTaskTime, self).__init__()
        self.tasks_time = collections.defaultdict(int)
        self.start_wait = None
        self.current_stage = None

    def do_handle(self, event):
        if event.name == "MainTaskStarted":
            self.start_wait = event.time
            self.current_stage = event[4]
        elif event.name == "MainTaskFinished":
            if self.start_wait and self.current_stage:
                self.tasks_time[self.current_stage] += event.time - self.start_wait

    def do_get_results(self):
        tasks = ["search", "fetch", "allfactors"]

        yield ("SearchTime", "microseconds"), self.tasks_time["search"]
        yield ("FetchTime", "microseconds"), self.tasks_time["fetch"]
        yield ("AllfactorsTime", "microseconds"), self.tasks_time["allfactors"]

        if "yandex_tier_stage" in self.tasks_time:
            tasks.append("yandex_tier_stage")
            yield ("YandexTierWaitTime", "microseconds"), self.tasks_time["yandex_tier_stage"]

        total_wait_time = sum(self.tasks_time.values())
        main_stages_time = sum([self.tasks_time[name] for name in tasks])

        yield ("OtherTime", "microseconds"), total_wait_time - main_stages_time
        yield ("MetaSearchTime", "microseconds"), self.req_ctx.request_time - total_wait_time


class CPUUsage(RequestCounter):
    def __init__(self):
        super(CPUUsage, self).__init__()
        self.cpu_usage = 0

    def do_handle(self, event):
        if event.name == "CPUUsage":
            self.cpu_usage = int(event[3])

    def do_get_results(self):
        yield ("CPUUsage", "microseconds"), self.cpu_usage


class SubSourceCounterBase(CounterBase):
    def __init__(self):
        super(SubSourceCounterBase, self).__init__()
        self.req_info = collections.defaultdict(dict)
        self.is_reask = False

    @staticmethod
    def is_snippet_request(request):
        if ("DF=da" in request) and not ("smp=2" in request):
            cgi_params = collections.defaultdict(list)
            for kv in [tmp.split("=", 1) for tmp in request.split("&")]:
                if kv and kv[0]:
                    cgi_params[kv[0]].append(kv[1] if len(kv) > 1 else "")

            if "diqm" in cgi_params:
                diqm = cgi_params["diqm"]
                if diqm:
                    return (int(diqm[0]) & 4) > 0  # TDocInfoQueryModes::DI_QUERY_SNIPPETS
            else:
                return True
        else:
            return False

    def get_request_type(self, req_info, request):
        task_descr = req_info.get("task_descr")
        source_name = req_info.get("source_name")

        if self.req_ctx.request_time:
            req_type = "SubSourceOther"

            if task_descr == "aux":
                req_type = "SubSourceAux"
            elif not ("dh=" in request):
                req_type = "SubSourceSearch"
            elif "allfctrs=da" in request:
                req_type = "SubSourceAllfactors"
            elif "fetch_doc_data=da" in request:
                req_type = "SubSourceFetchDocData"
            elif self.is_snippet_request(request):
                req_type = "SubSourceSnippets"

            return req_type + "_" + source_name
        elif self.is_reask:
            return "ReAskSubSource"
        else:
            return "UnknownSubSource"

    def handle(self, event):
        if event.name == "CacheUpdateJobStarted":
            self.is_reask = True
        elif event.name == "SubSourceInit":
            source_num = event[3]
            task_id = event[5]
            key = (source_num, task_id)
            self.req_info[key].update({"task_descr": event[7], "source_name": event[6]})
        elif event.name == "SubSourceRequest":
            source_num = event[3]
            attempt = event[4]
            request = event[7]
            task_id = event[8]

            key = (source_num, attempt, task_id)
            req_type = self.get_request_type(self.req_info[(source_num, task_id)], request)
            self.req_info[key].update({"req_type": req_type})

    def get_results(self):
        raise NotImplementedError(NIE_MSG)


class SubSourceTime(SubSourceCounterBase):
    def __init__(self):
        super(SubSourceTime, self).__init__()
        self.docs_count = collections.defaultdict(int)
        self.req_count = collections.defaultdict(int)
        self.req_len = collections.defaultdict(list)
        self.reply_len = collections.defaultdict(list)
        self.task_count = collections.defaultdict(int)
        self.req_times = collections.defaultdict(list)

    @staticmethod
    def get_doc_count(request):
        return request.count("&dh=")

    def handle(self, event):
        super(SubSourceTime, self).handle(event)

        if event.name == "SubSourceRequest":
            source_num = event[3]
            attempt = event[4]
            request = event[7]
            task_id = event[8]
            request_body = "" if len(event) < 10 else event[9]

            key = (source_num, attempt, task_id)
            req_type = self.get_request_type(self.req_info[(source_num, task_id)], request)
            self.req_info[key].update({"start_time": event.time})

            self.docs_count[req_type] += self.get_doc_count(request)
            self.req_count[req_type] += 1
            self.req_len[req_type].append(len(request) + len(request_body))

            if not self.req_info[(source_num, task_id)].get("task_counted"):
                self.req_info[(source_num, task_id)]["task_counted"] = True
                self.task_count[req_type] += 1
        elif event.name == "SubSourceOk":
            source_num = event[3]
            attempt = event[4]
            task_id = event[9]
            reply_len = int(event[10])

            key = (source_num, attempt, task_id)
            if key in self.req_info:
                info = self.req_info[key]
                if len(event) > 12:
                    request_time = int(event[12])
                else:
                    request_time = event.time - info["start_time"]

                self.req_times[info["req_type"]].append(request_time)
                self.reply_len[info["req_type"]].append(reply_len)

    def get_results(self):
        if self.req_ctx.request_time:
            for req_type, doc_count in self.docs_count.items():
                yield (req_type + "_Docs", "count"), doc_count

            for req_type, task_count in self.task_count.items():
                yield (req_type + "_Tasks", "count"), task_count

            for req_type, req_count in self.req_count.items():
                yield (req_type + "_Requests", "count"), req_count

        for req_type, values in self.req_times.items():
            for v in values:
                yield (req_type, "microseconds"), v

        for req_type, values in self.req_len.items():
            for v in values:
                yield (req_type + "_ReqLen", "bytes"), v

        for req_type, values in self.reply_len.items():
            for v in values:
                yield (req_type + "_ReplyLen", "bytes"), v


class SubSourceRequests(SubSourceCounterBase):
    """
        Извлекает входящий запрос (ContextCreated) и первый запрос в каждый источник
    """

    def __init__(self):
        super(SubSourceRequests, self).__init__()

    def on_frame_start(self):
        self.request_cgi = None
        self.subsource_requests_by_types = collections.defaultdict(list)  # subsource_type → [list of (source_num, attempt, task_id)]

    def handle(self, event):
        super(SubSourceRequests, self).handle(event)

        if event.name == "ContextCreated":
            self.request_cgi = event[3]
            # чтобы get_request_type не возвращал UnknownSubSource
            self.req_ctx.request_time = 1
        elif event.name == "SubSourceRequest":
            source_num = event[3]
            attempt = event[4]
            request = event[7]
            task_id = event[8]

            key = (source_num, attempt, task_id)
            info = self.req_info[key]

            info.update({"request": request})
            self.subsource_requests_by_types[info["req_type"]].append(key)

    def get_results(self):
        if self.request_cgi is None:
            return
        subsource_requests = {}
        for subsource_type, request_keys in self.subsource_requests_by_types.iteritems():
            request_key = min(request_keys, key=lambda (source_num, attempt, task_id): (int(source_num), int(attempt), int(task_id)))
            request = self.req_info[request_key]["request"]
            subsource_requests[subsource_type] = {"url": request, "source_num": request_key[0]}
        result = {
            "subsource_requests": subsource_requests,
            "request_cgi": self.request_cgi,
            "_debug_frame_id": self.frame_id,
        }
        yield ("SubSourceRequests", result)


class SubSourceError(SubSourceCounterBase):
    def __init__(self):
        super(SubSourceError, self).__init__()
        self.error_requests_count = collections.defaultdict(int)

    def handle(self, event):
        super(SubSourceError, self).handle(event)

        if event.name == "SubSourceError":
            source_num = event[3]
            attempt = event[4]
            task_id = event[11]

            # не считаем hedged запросы за ошибочные
            # см. описание параметра SendingTimeout здесь https://wiki.yandex-team.ru/jandekspoisk/sepe/metaconfdescription/
            if event[14] == 'Canceled':
                return

            key = (source_num, attempt, task_id)
            if key in self.req_info:
                info = self.req_info[key]
                self.error_requests_count[info["req_type"]] += 1

    def get_results(self):
        for req_type, error_count in self.error_requests_count.items():
            # сделать просто `yield (req_type + "_Error", "count"), error_count` нельзя,
            # потому что тогда в COMPARE_MIDDLESEARCH_BINARIES придётся считать SubSourceTYPE_Error.Avg * SubSourceTYPE_Error.Count,
            # но SubSourceTYPE_Error.Avg округляется до целых и получается неточно
            # значение статистики (1) выбрано произвольно, можно использовать любое другое число
            for _ in range(error_count):
                yield (req_type + "_Error", "count"), 1


class RearrStat(object):
    __slots__ = ("start_time", "total_time", "docs_before", "docs_removed", "opened", "suspended", "total_async_time", "self_async_time")

    def __init__(self):
        self.opened = None
        self.suspended = None
        self.start_time = None
        self.total_time = 0
        self.docs_before = None
        self.docs_removed = None
        self.total_async_time = 0
        self.self_async_time = 0

    def __repr__(self):
        return "{} (opened = {}, suspended = {})".format(self.start_time, self.opened, self.suspended)

    def fill(self, event, is_stop):
        doc_count = int(event[5])
        if not is_stop:
            self.fill_start(doc_count, event)

        elif self.start_time is not None and self.docs_before is not None:
            if self.docs_removed is None:
                self.docs_removed = 0

            self.total_time += (event.time - self.start_time)
            self.docs_removed = max(self.docs_removed, self.docs_before - doc_count)

            self.start_time = None
            self.docs_before = None
            self.opened = False

    def fill_start(self, doc_count, event):
        self.start_time = event.time
        self.docs_before = doc_count
        self.opened = True

    def suspend(self, event):
        self.total_time += (event.time - self.start_time)
        self.start_time = None
        self.suspended = event.time

    def unsuspend(self, event):
        self.start_time = event.time
        self.suspended = None

    def register_async_time(self, total_async_time, self_async_time):
        self.total_async_time += total_async_time
        self.self_async_time += self_async_time


class Rearranges(RequestCounter):
    STAGES = {
        # with respect to search/meta/rearrange/common/rule_phase.h
        "middle": [
            "Merge",                       # 0
            "Fetch",                       # 2
            "unused",                      # 4
            "unused",                      # 6
            "unused",                      # 8
            "unused",                      # 10
            "unused",                      # 12
            "BeforeFetch",                 # 14
            "AfterSearch",                 # 16
            "unused",                      # 18
            "unused",                      # 20
            "RearrangeAfterMetaFeatures",  # 22
            "RearrangeAfterMetaRank",      # 24
        ],
    }

    def __init__(self, mode):
        super(RequestCounter, self).__init__()
        self.group_names = collections.deque()
        self.rearr_suspend = collections.defaultdict(bool)
        self.mode = mode
        self.rearrs = [collections.defaultdict(RearrStat) for _ in self.STAGES[mode]]

    def do_handle(self, event):
        if event.name == "RearrangeInfo":
            name = event[4]
            stage_num, is_stop = divmod(int(event[3]), 2)
            self.rearrs[stage_num][name].fill(event, is_stop)
        elif event.name == "RearrangeAsyncTaskStats":
            name = event[4]
            stage_num, is_stop = divmod(int(event[3]), 2)
            if is_stop == 1:
                self.rearrs[stage_num][name].register_async_time(int(event[5]), int(event[6]))

    def do_get_results(self):
        for stage_num, stage in enumerate(self.STAGES[self.mode]):
            for r, s in self.rearrs[stage_num].iteritems():
                yield ("-".join([r, stage]), "microseconds"), s.total_time
                if (s.total_async_time) and (s.self_async_time is not None):
                    yield ("-".join([r, stage, "TotalAsyncTime"]), "microseconds"), s.total_async_time
                    yield ("-".join([r, stage, "SelfAsyncTime"]), "microseconds"), s.self_async_time
                if s.docs_removed:
                    yield ("-".join([r, "DocsRemoved", stage]), "counter"), s.docs_removed


class RearrangeAdjusterStat(object):
    __slots__ = ("total_time_microseconds",)

    def __init__(self):
        self.total_time_microseconds = 0

    def __repr__(self):
        return "(total_time_microseconds = {})".format(self.total_time_microseconds)

    def fill(self, event):
        time_microseconds = int(event[4])
        self.total_time_microseconds = time_microseconds


class RearrangeAdjusterCounter(RequestCounter):
    STAGES = {
        'middle': [
            'PrepareRearrangeParams',
            'AdjustClientParams',
            'AdjustFactorRequests',
        ]
    }

    def __init__(self, mode):
        super(RearrangeAdjusterCounter, self).__init__()
        self.group_names = collections.deque()
        self.rearr_suspend = collections.defaultdict(bool)
        self.mode = mode
        self.stages = self.STAGES[mode]
        self.rearrange_stats = {stage: collections.defaultdict(RearrangeAdjusterStat) for stage in self.stages}

    def do_handle(self, event):
        if event.name in self.stages:
            name = event[3]
            self.rearrange_stats[event.name][name].fill(event)

    def do_get_results(self):
        for stage in self.stages:
            for rule_name, stat in self.rearrange_stats[stage].iteritems():
                yield ("-".join([rule_name, stage]), "microseconds"), stat.total_time_microseconds


class MetaFeatureStat(object):
    __slots__ = ("start_time", "total_time", "total_async_time", "self_async_time")

    def __init__(self):
        self.start_time = None
        self.total_time = None
        self.total_async_time = None
        self.self_async_time = None


class MetaFeatures(RequestCounter):
    def __init__(self):
        super(MetaFeatures, self).__init__()
        self.feature_stat = collections.defaultdict(MetaFeatureStat)

    def do_handle(self, event):
        if event.name == "StartCalcMetaFeature":
            name = event[3]
            self.feature_stat[name].start_time = event.time
        elif event.name == "StopCalcMetaFeature":
            name = event[3]
            f_stat = self.feature_stat[name]
            if f_stat.start_time:
                if f_stat.total_time is None:
                    f_stat.total_time = 0

                f_stat.total_time += (event.time - f_stat.start_time)
                f_stat.start_time = None
            f_stat.total_async_time = int(event[4])
            f_stat.self_async_time = int(event[5])
        elif event.name == "ParallelCalcFeaturesTask":
            name = event[3]
            f_stat = self.feature_stat[name]
            f_stat.total_time = int(event[4])
            f_stat.total_async_time = int(event[5])
            f_stat.self_async_time = int(event[6])

    def do_get_results(self):
        for r, s in self.feature_stat.items():
            if s.total_time is not None:
                yield (r + "-Feature", "microseconds"), s.total_time
            if (s.total_async_time) and (s.self_async_time is not None):
                yield ("-".join([r, "Feature", "TotalAsyncTime"]), "microseconds"), s.total_async_time
                yield ("-".join([r, "Feature", "SelfAsyncTime"]), "microseconds"), s.self_async_time


class UnanswersPercent(RequestCounter):
    def __init__(self):
        super(UnanswersPercent, self).__init__()
        self.total_count = 0
        self.unanswer_count = 0

    def do_handle(self, event):
        if event.name == "RequestStat":
            group_name = event[7]
            if group_name == "0_TIER":
                self.total_count += int(event[3])
                self.unanswer_count += int(event[4])

    def do_get_results(self):
        if self.total_count > 0:
            yield ("UnanswersPercent", "%"), 100.0 * self.unanswer_count / self.total_count


class ErrorMessage(RequestCounter):
    def __init__(self):
        super(ErrorMessage, self).__init__()
        self.error_message_count = 0

    def do_handle(self, event):
        if event.name == "ErrorMessage":
            self.error_message_count += 1

    def do_get_results(self):
        if self.error_message_count > 0:
            yield ("ErrorMessage", "count"), self.error_message_count


class DeltaCounter(RequestCounter):
    """Supports nested events"""

    def __init__(self):
        super(DeltaCounter, self).__init__()
        self.time_sum = 0
        self.start = None

    @property
    def name(self):
        raise NotImplementedError(NIE_MSG)

    @property
    def first_event(self):
        raise NotImplementedError(NIE_MSG)

    @property
    def second_event(self):
        raise NotImplementedError(NIE_MSG)

    def do_handle(self, event):
        if event.name == self.first_event:
            self.start = event.time
        elif event.name == self.second_event and self.start is not None:
            self.time_sum += (event.time - self.start)
            self.start = None

    def do_get_results(self):
        if self.time_sum:
            yield (self.name, "microseconds"), self.time_sum


class StageInfo(object):
    NO_GROUPING = "NO_GROUPING"

    def __init__(self, groupings, start_stage):
        self.stage_groupings = tuple(groupings)
        self.start_stage = start_stage
        self.end_stage = None
        self.nested_time = 0
        self.rearrs = collections.defaultdict(RearrStat)

    def total_time(self):
        return self.end_stage - self.start_stage

    def curr_grouping(self):
        return self.stage_groupings[-1] if self.stage_groupings else self.NO_GROUPING

    def __repr__(self):
        return str(self.end_stage or self.start_stage) + "  " * len(self.stage_groupings) + str(self.curr_grouping())


class FullDeltaCounter(RequestCounter):
    """SEARCH-4142"""
    OLD_STAGES = [
        "RearrangeAfterMerge",
        "RearrangeAfterFetch",
        "RearrangeAfterBlenderFactors",
        "RearrangeAfterIntentWeights",
        "RearrangeAfterBlend"
    ]
    STAGES = [
        "RearrangeAfterMerge",
        "RearrangeAfterFetch",
        "CalcBlenderFeatures",
        "CalcBlenderMetaFeatures",
        "RearrangeAfterBlenderFactors",
        "RearrangeAfterIntentWeights",
        "RearrangeAfterBlend",
        "RearrangeBeforeFetch",
        "RearrangeAfterMetaFeatures",
        "RearrangeAfterMetaRank",
    ]
    # STARTS = frozenset("Start{}".format(s) for s in STAGES)
    # ENDS = frozenset("Stop{}".format(s) for s in STAGES)
    START_MERGE = "StartMerge"
    STOP_MERGE = "StopMerge"

    def __init__(self, by_grouping, old_stages_order):
        super(FullDeltaCounter, self).__init__()
        self.stages = self.OLD_STAGES if old_stages_order else self.STAGES
        self.starts = frozenset("Start{}".format(s) for s in self.stages)
        self.ends = frozenset("Stop{}".format(s) for s in self.stages)
        self.groupings = [collections.deque() for _ in self.stages]
        self.stage_infos = [collections.deque() for _ in self.stages]
        self.completed_stage_infos = [[] for _ in self.stages]
        self.by_grouping = by_grouping

    def do_handle(self, event):
        if event.name == self.START_MERGE:
            for group_names in self.groupings:
                group_names.append(event[6])
            # print "  " * len(self.groupings[0]), self.groupings
        self._on_start_rearr(event)
        self._on_rearr_info(event)
        self._on_end_rearr(event)

    def _on_start_rearr(self, event):
        if event.name in self.starts:
            stage_num = self._get_stage_num(event.name)
            self._suspend_rearrs(stage_num, event)
            stage_info = StageInfo(self.groupings[stage_num], event.time)
            self.stage_infos[stage_num].append(stage_info)
            # print stage_info, event.name

    def _on_rearr_info(self, event):
        if event.name == "RearrangeInfo":
            rearr_name = event[4]
            stage_num, is_stop = divmod(int(event[3]), 2)
            self.stage_infos[stage_num][-1].rearrs[rearr_name].fill(event, is_stop)

    def _on_end_rearr(self, event):
        if event.name in self.ends:
            stage_num = self._get_stage_num(event.name)
            self.stage_infos[stage_num][-1].end_stage = event.time
            # print self.stage_infos[stage_num][-1], event.name
            stage_info = self.stage_infos[stage_num].pop()
            self.completed_stage_infos[stage_num].append(stage_info)
            if self.groupings[stage_num]:
                self.groupings[stage_num].pop()
            self._unsuspend_rearrs(stage_num, event)

    def _suspend_rearrs(self, stage_num, event):
        if not self.stage_infos[stage_num]:
            return
        for k, r in self.stage_infos[stage_num][-1].rearrs.iteritems():
            if r.opened and not r.suspended:
                r.suspend(event)
                # print event.time, "Suspend", k, self.stage_infos[stage_num][-1]

    def _unsuspend_rearrs(self, stage_num, event):
        if not self.stage_infos[stage_num]:
            return
        for k, r in self.stage_infos[stage_num][-1].rearrs.iteritems():
            # print "Try to unsuspend"
            # print self.stage_infos[stage_num][-1].rearrs
            if r.opened and r.suspended:
                # print "Unsuspend", k
                r.unsuspend(event)

    def _get_stage_num(self, event_name):
        for i, stage_name in enumerate(self.stages):
            if event_name.endswith(stage_name):
                return i

    def do_get_results(self):
        for i, infos_by_stage in enumerate(self.completed_stage_infos):
            all_rearrs_aggregated = collections.defaultdict(int)
            for stage_info in infos_by_stage:
                curr_g = str(stage_info.curr_grouping())
                yield (self.stages[i] + "-" + curr_g, "microseconds"), stage_info.total_time()
                all_rearr_time = 0
                for r_name, r_info in stage_info.rearrs.iteritems():
                    rearr_stat_list_name = [r_name, self.stages[i]]
                    if self.by_grouping:
                        rearr_stat_list_name.append(curr_g)
                    rearr_stat_name = "-".join(rearr_stat_list_name)
                    all_rearrs_aggregated[rearr_stat_name] += r_info.total_time
                    all_rearr_time += r_info.total_time
                yield (self.stages[i] + "-" + curr_g + "-Overhead", "microseconds"), stage_info.total_time() - all_rearr_time
            for i in all_rearrs_aggregated.iteritems():
                yield i


class CalcPrepareFactorRequests(DeltaCounter):
    name = "PrepareFactorRequests"
    first_event = "PrepareAllfactorsRequestsStarted"
    second_event = "PrepareAllfactorsRequestsFinished"


class CalcMetaRelevance(DeltaCounter):
    name = "CalcMetaRelevance"
    first_event = "StartCalcMetaRelevance"
    second_event = "StopCalcMetaRelevance"

    def __init__(self):
        super(CalcMetaRelevance, self).__init__()
        self.total_async_time = None
        self.self_async_time = None

    def do_handle(self, event):
        super(CalcMetaRelevance, self).do_handle(event)
        if event.name == self.second_event:
            self.total_async_time = int(event[3])
            self.self_async_time = int(event[4])

    def do_get_results(self):
        for r in super(CalcMetaRelevance, self).do_get_results():
            yield r
        if (self.total_async_time) and (self.self_async_time is not None):
            yield("-".join([self.name, "TotalAsyncTime"]), "microseconds"), self.total_async_time
            yield("-".join([self.name, "SelfAsyncTime"]), "microseconds"), self.self_async_time


class CalcMetaFeatures(DeltaCounter):
    name = "CalcMetaFeatures"
    first_event = "StartCalcMetaFeatures"
    second_event = "StopCalcMetaFeatures"


class MetaRank(DeltaCounter):
    name = "MetaRank"
    first_event = "StartMetaRank"
    second_event = "StopMetaRank"


class RearrangeAfterMerge(DeltaCounter):
    name = "RearrangeAfterMerge"
    first_event = "StartRearrangeAfterMerge"
    second_event = "StopRearrangeAfterMerge"


class RearrangeOverhead(RequestCounter):
    def __init__(self, suffix):
        super(RearrangeOverhead, self).__init__()
        self.suffix = suffix
        self.start_event = "StartRearrange" + suffix
        self.stop_event = "StopRearrange" + suffix
        self.rearr_info_shift = {
            "AfterMerge": 0,
            "AfterFetch": 2,
            "BeforeFetch": 14,
            "AfterMetaFeatures": 22,
            "AfterMetaRank": 24,
        }.get(suffix)

        self.accumulator = 0
        self.prev_time = None
        self.has_results = False

    def overhead_start(self, event):
        if event.name == self.start_event:
            return True
        if event.name == "RearrangeInfo" and int(event[3]) == self.rearr_info_shift + 1:
            return True

        return False

    def overhead_stop(self, event):
        if not self.prev_time:
            return False

        if event.name == self.stop_event:
            return True
        if event.name == "RearrangeInfo" and int(event[3]) == self.rearr_info_shift:
            return True

        return False

    def do_handle(self, event):
        if self.overhead_start(event):
            self.prev_time = event.time
        elif self.overhead_stop(event):
            self.has_results = True
            self.accumulator += (event.time - self.prev_time)
            self.prev_time = None

    def do_get_results(self):
        if self.has_results:
            yield ("Rearrange" + self.suffix + "Overhead", "microseconds"), self.accumulator


class AfterMergeRearrangeOverhead(RearrangeOverhead):
    def __init__(self):
        RearrangeOverhead.__init__(self, "AfterMerge")


class BeforeFetchRearrangeOverhead(RearrangeOverhead):
    def __init__(self):
        RearrangeOverhead.__init__(self, "BeforeFetch")


class AfterFetchRearrangeOverhead(RearrangeOverhead):
    def __init__(self):
        RearrangeOverhead.__init__(self, "AfterFetch")


class AfterMetaFeaturesRearrangeOverhead(RearrangeOverhead):
    def __init__(self):
        RearrangeOverhead.__init__(self, "AfterMetaFeatures")


class AfterMetaRankRearrangeOverhead(RearrangeOverhead):
    def __init__(self):
        RearrangeOverhead.__init__(self, "AfterMetaRank")


class RearrangeBeforeFetch(DeltaCounter):
    name = "RearrangeBeforeFetch"
    first_event = "StartRearrangeBeforeFetch"
    second_event = "StopRearrangeBeforeFetch"


class RearrangeAfterFetch(DeltaCounter):
    name = "RearrangeAfterFetch"
    first_event = "StartRearrangeAfterFetch"
    second_event = "StopRearrangeAfterFetch"


class RearrangeAfterMetaFeatures(DeltaCounter):
    name = "RearrangeAfterMetaFeatures"
    first_event = "StartRearrangeAfterMetaFeatures"
    second_event = "StopRearrangeAfterMetaFeatures"


class RearrangeAfterMetaRank(DeltaCounter):
    name = "RearrangeAfterMetaRank"
    first_event = "StartRearrangeAfterMetaRank"
    second_event = "StopRearrangeAfterMetaRank"


class CacheLoad(DeltaCounter):
    name = "CacheLoad"
    first_event = "ContextCreated"
    second_event = "CachedResultStat"


class CacheLoadV2(DeltaCounter):
    name = "CacheLoadV2"
    first_event = "TryLoadFromCache"
    second_event = "CachedResultStat"


class PrepareClients(DeltaCounter):
    name = "PrepareClients"
    first_event = "StartPrepareClients"
    second_event = "StopPrepareClients"


class PrintReport(DeltaCounter):
    name = "ReportPrinted"
    first_event = "PageRetrieved"
    second_event = "ReportPrinted"


class Merge(DeltaCounter):
    name = "Merge"
    first_event = "StartMergeGrouping"
    second_event = "StopMergeGrouping"


class ExtraMergeCycles(RequestCounter):
    def __init__(self):
        super(ExtraMergeCycles, self).__init__()
        self.merge_counter = 0

    def do_handle(self, event):
        if event.name == "StartMerge":
            self.merge_counter += 1

    def do_get_results(self):
        if self.merge_counter > 1:
            yield ("ExtraMergeCycles", "counter"), self.merge_counter


class Histogram(object):
    def __init__(self, value_name, sorted_values, options):
        self.lines = []

        count = len(sorted_values)
        if count == 0:
            return

        if value_name in options.hist:
            min_val = 0
            max_val = options.hist[value_name][1]
            step = options.hist[value_name][0]
        else:
            min_val = sorted_values[0]
            max_val = sorted_values[int(0.99*count)]
            step = self.choose_step(min_val, max_val)

        bounds = []
        bound = int(min_val/step)*step
        while bound <= max_val:
            bounds.append(bound)
            bound += step
        bounds.append(max_val + 1)

        acc_count = 0
        j = 0
        for i in xrange(1, len(bounds)):
            bucket_count = 0
            a = bounds[i - 1]
            b = bounds[i]
            while j < count and sorted_values[j] < b:
                bucket_count += 1
                j += 1

            a = print_float("{:7.2f}", a)
            if b > max_val:
                b = "..."
            else:
                b = print_float("{:7.2f}", b)

            acc_count += bucket_count

            self.lines.append((a, b, bucket_count, acc_count, 100.0*bucket_count/count, 100.0*(count - acc_count)/count))

    def dump(self, ostream):
        for a, b, count, sum_count, p, sum_p in self.lines:
            l = "{:<20}: {:<10} {:<10} {:<10.1f} {:<10.1f} {}".format(
                "[{:>7}, {:>7})".format(a, b), count, sum_count, p, sum_p, '*'*int(p)
            )
            print >> ostream, l

    @staticmethod
    def choose_step(min_val, max_val):
        steps = [0.001, 0.002, 0.005]

        delta = max(steps[0], (max_val - min_val)/10.0)
        multiplier = 1
        while True:
            for r in steps:
                next_step = r*multiplier
                if next_step >= delta:
                    return next_step

            multiplier *= 10


class Stat(object):
    def __init__(self, value_name, dimension, value_list, options):
        self.dimension = dimension
        self.result = {n: None for n in options.stat_names}
        self.options = options

        value_list.sort()
        self.hist = Histogram(value_name, value_list, options)

        if len(value_list) > 0:
            for stat_name in options.stat_names:
                self.result[stat_name] = self.calc_stat(stat_name, value_list)
        else:
            return

    def __getitem__(self, key):
        return self.result.get(key, -1)

    def iteritems(self):
        return self.result.iteritems()

    def dump(self, stream):
        for n in self.options.stat_names:
            print >>stream, "{:>20}: {:<20}".format(n, print_float("{:.2f}", self.result[n]))
        self.hist.dump(stream)
        print >> stream, "----------"

    @staticmethod
    def calc_stat(name, sorted_values):
        func_map = {
            "Min": lambda v: v[0],
            "Max": lambda v: v[-1],
            "Count": lambda v: len(v),
            "Avg": lambda v: 1.0*sum(v) / len(v)
        }

        if name in func_map:
            return func_map[name](sorted_values)

        if name == "HiAvg":
            try:
                begin = int(0.75*len(sorted_values))
                end = int(0.97*len(sorted_values))
                return 1.0*sum(sorted_values[begin:end+1]) / (end - begin + 1)
            except Exception:
                pass

        elif name == "MeanAvg":
            try:
                begin = int(0.25*len(sorted_values))
                end = int(0.75*len(sorted_values))
                return 1.0*sum(sorted_values[begin:end+1]) / (end - begin + 1)
            except Exception:
                pass

        for quantile_prefix in ["Q", "Quantile_"]:
            if name.startswith(quantile_prefix):
                try:
                    return sorted_values[int(float(name.lstrip(quantile_prefix)) * len(sorted_values)/100.0)]
                except Exception:
                    pass

        return -1


class KeyInvStats(RequestCounter):
    def __init__(self):
        super(RequestCounter, self).__init__()
        self.self_time = None
        self.self_cpu_time = None
        self.queue_wait_time = None

    def do_handle(self, event):
        if event.name == "KeyInvTimeInfo":
            self.self_time = int(event[3])
            self.self_cpu_time = int(event[4])
            self.queue_wait_time = int(event[5])

    def do_get_results(self):
        if self.self_time is not None:
            yield ("KeyInv-SelfTime", "microseconds"), self.self_time
        if self.self_cpu_time is not None:
            yield ("KeyInv-SelfCpuTime", "microseconds"), self.self_cpu_time
        if self.queue_wait_time is not None:
            yield ("KeyInv-QueueWaitTime", "microseconds"), self.queue_wait_time


COUNTERS = {
    "middle": [
        RequestTime, MainTaskTime, CPUUsage, MetaFeatures,
        UnanswersPercent, CalcPrepareFactorRequests, CalcMetaRelevance, CalcMetaFeatures, MetaRank,
        RearrangeAfterMerge, RearrangeBeforeFetch, RearrangeAfterFetch, RearrangeAfterMetaFeatures, RearrangeAfterMetaRank,
        CacheLoad, CacheLoadV2, PrepareClients,
        PrintReport, Merge,
        AfterMergeRearrangeOverhead, BeforeFetchRearrangeOverhead, AfterFetchRearrangeOverhead,
        AfterMetaFeaturesRearrangeOverhead, AfterMetaRankRearrangeOverhead,
        ExtraMergeCycles, SubSourceTime, SubSourceError, ErrorMessage,
        KeyInvStats
    ],
    "blender": [
        RequestTime, MainTaskTime, CPUUsage, UnanswersPercent, PrepareClients,
        PrintReport, Merge, ExtraMergeCycles, SubSourceTime,
    ],
    "ydo": [
        YdoRequestTime, MainTaskTime, CPUUsage, MetaFeatures,
        UnanswersPercent, CalcPrepareFactorRequests, CalcMetaRelevance, CalcMetaFeatures, MetaRank,
        RearrangeAfterMerge, RearrangeBeforeFetch, RearrangeAfterFetch, RearrangeAfterMetaFeatures, RearrangeAfterMetaRank,
        CacheLoad, PrepareClients,
        PrintReport, Merge,
        AfterMergeRearrangeOverhead, BeforeFetchRearrangeOverhead, AfterFetchRearrangeOverhead,
        AfterMetaFeaturesRearrangeOverhead, AfterMetaRankRearrangeOverhead,
        ExtraMergeCycles, SubSourceTime, SubSourceError,
    ],
}


class StatCalc(object):
    def __init__(self, options):
        self.stat = {}
        self.options = options
        self.counters = [c() for c in COUNTERS[options.mode]]
        if options.mode == "ydo":
            options.mode == "middle"
        if options.mode == "middle":
            if options.rearr:
                self.counters.append(Rearranges(options.mode))
            if options.rearr_adjusters_stat:
                self.counters.append(RearrangeAdjusterCounter(options.mode))
        elif options.mode == "blender":
            self.counters.append(FullDeltaCounter(options.by_grouping, options.old_stages_order))  # SEARCH-4142

    def parse_eventlog(self, stream):
        full_result = collections.defaultdict(list)
        pool = multiprocessing.Pool(processes=self.options.process_count)
        try:
            for r in pool.imap_unordered(calc_frame, iter_log(stream, self.counters)):
                for key, value in r.items():
                    full_result[key].extend(value)
        finally:
            pool.terminate()

        for key, values in full_result.iteritems():
            try:
                name, dimension = key
            except:
                name = key
                dimension = '?'

            assert name not in self.stat, 'stat already present'

            self.stat[name] = Stat(name, dimension, values, self.options)

    def dump(self, ostream, sort_func):
        for stat_name, stat_val in sorted(self.stat.iteritems(), key=sort_func):
            print >> ostream, stat_name, "({})".format(stat_val.dimension)
            stat_val.dump(ostream)

    def get_stat_value(self, full_name):
        value_name, sep, stat_name = full_name.rpartition(".")
        if value_name in self.stat:
            return float(print_float("{:.2f}", self.stat[value_name][stat_name]))
        else:
            return -1

    def get_full_stat(self):
        res = {}
        for value_name, stat_map in self.stat.iteritems():
            for stat_name, value in stat_map.iteritems():
                full_name = "{}.{}".format(value_name, stat_name)
                res[full_name] = value

        return res

    def get_dimensions(self):
        res = {}
        for value_name, stat in self.stat.iteritems():
            res[value_name] = stat.dimension
        return res


def calc_frame(frame_and_counters):
    try:
        frame_id, frame, counters = frame_and_counters
        local_results = {}
        events = [Event(fields) for fields in frame]
        req_ctx = ReqCtx()

        for c in counters:
            # print c
            c.req_ctx = req_ctx
            c.frame_id = frame_id

            c.on_frame_start()
            for event in events:
                c.handle(event)

            for r in c.get_results():
                if r:
                    name, v = r
                    if v is not None:
                        local_results.setdefault(name, []).append(v)
        return local_results
    except Exception as e:
        logging.error(e, exc_info=True)
        raise


def iter_log(stream, counters):
    frames = collections.defaultdict(list)
    for line in stream:
        fields = line.rstrip("\n").split("\t")
        if len(fields) < 3:  # time, frame_id, event_name
            continue

        frame_id = fields[1]
        if fields[2] == 'EndOfFrame':
            yield frame_id, frames[frame_id], counters
            del frames[frame_id]
        else:
            frames[frame_id].append(fields)


class Options:
    def __init__(self):
        self.rearr = False
        self.process_count = 4
        self.mode = "middle"
        self.stat_names = ["Count"]
        self.hist = {}
        self.old_stages_order = False
        self.rearr_adjusters_stat = False


class Timer(object):
    def __enter__(self):
        self.start = time.clock()
        return self

    def __exit__(self, *args):
        self.end = time.clock()
        self.interval = self.end - self.start
        print "Time:", self.interval


def sort_result_by(stat_name):
    if stat_name:
        return lambda x: -x[1][stat_name]
    else:
        return lambda x: x[0]


def main():
    parser = optparse.OptionParser()
    parser.add_option(
        "", "--processes", action="store", type="int", dest="process_count",
        default=4,
        help="Set number of worker threads",
    )
    parser.add_option(
        "", "--rearr", action="store_true", dest="rearr", default=False,
        help="Count stat about rearranges or not",
    )
    parser.add_option(
        "", "--hist", action="store", type="string", dest="hist", default="",
        help=(
            "Override histogram options, comma separated list of CounterName_Step_UpperBound "
            "(e.g RequestTime_5000_100000)"
        ))
    parser.add_option(
        "", "--stat", action="store", type="string", dest="stat", default="",
        help="Set which values to calc, comma separated list of names (e.g. Count,Min,Q95)"
    )
    # SEARCH-4142:
    parser.add_option(
        "", "--mode", action="store", type="string", dest="mode", default="middle",
        help="Mode (middle or blender)"
    )
    parser.add_option(
        "", "--by_grouping", action="store_true", dest="by_grouping", default=False,
        help="Every rearrange for every grouping separately (blender mode only, more slowly)"
    )
    parser.add_option(
        "", "--sort_by", action="store", type="string", dest="sort_by", default="",
        help="Sort results by specified stat name [ex. Avg] (default = sort by name)"
    )
    parser.add_option(
        "", "--rearr_adjusters_stat", action="store_true", dest="rearr_adjusters_stat", default=False,
        help="whether to calc detailed stat within PrepareClients like PrepareRearrangeParams,AdjustClientParams,"
             "AdjustFactorRequests"
    )

    # SEARCH-8183: Temporary option TODO(dima-zakharov): Remove
    parser.add_option(
        "", "--old_stages_order", action="store_true", dest="old_stages_order", default=False,
        help="Use old order of rearrange stages (SEARCH-8183)"
    )

    cmd_opts, args = parser.parse_args()

    options = Options()
    options.rearr = cmd_opts.rearr
    options.process_count = cmd_opts.process_count
    options.mode = cmd_opts.mode
    options.by_grouping = cmd_opts.by_grouping
    options.old_stages_order = cmd_opts.old_stages_order
    options.rearr_adjusters_stat = cmd_opts.rearr_adjusters_stat
    if cmd_opts.stat:
        options.stat_names = cmd_opts.stat.split(",")

    if cmd_opts.hist:
        for hist_opt in cmd_opts.hist.split(","):
            counter_name, step, upper_bound = hist_opt.split("_")
            options.hist[counter_name] = (int(step), int(upper_bound))

    if not args or args[0] == "-":
        stream = sys.stdin
    else:
        stream = open(args[0], "r")

    with Timer():
        calc = StatCalc(options)
        calc.parse_eventlog(stream)
        calc.dump(sys.stdout, sort_func=sort_result_by(cmd_opts.sort_by))


if __name__ == "__main__":
    main()
