#!/usr/bin/env python

# -*- coding: utf-8 -*-

import sys, urllib, re, itertools, datetime

class FrameFunctor(object):
    def on_frame(self, frame):
        pass
    def finished(self):
        return False
    def on_finish(self):
        pass

# eventlog parser, for each frame in input, calls functor.on_frame
def by_frame(input, functor):
    frames = {}
    for line in input:
        line = line.rstrip("\n")
        fields = line.split("\t")
        if len(fields) < 3:#time, frame_id, event_name
            continue

        frame_id = int(fields[1])

        frame = frames.get(frame_id, [])
        frame.append(fields)
        frames[frame_id] = frame

        if fields[2] == 'EndOfFrame':
            functor.on_frame(frame)

            del frames[frame_id]

        if functor.finished():
            break

    for key in frames:
        functor.on_frame(frames[key])

    functor.on_finish()

def print_frame(frame):
    for event in frame:
        print "\t".join(event)

def frame_times(frame):
    start = None
    finish = None
    for event in frame:
        time = int(event[0])
        if start is None:
            start = time
        finish = time
        if event[2] == "EnqueueYSRequest":
            start = time
        if event[2] == "ReportPrinted":
            finish = time
            break
    return start, finish

def frame_time(frame):
    start, finish = frame_times(frame)
    return finish - start

def get_user_request(full_req):
    # TODO(ivaxer): pre-compile regexp
    m = re.match(".*&user_request=([^&]*)&.*", full_req)
    if m:
        return m.group(1)
    else:
        return ""

def get_hostname(full_req):
    # TODO(ivaxer): pre-compile regexp
    m = re.match("http2?://([^/]+)/.*", full_req)
    if m:
        return m.group(1)
    else:
        return ""

def fmt_ts(ts):
    return datetime.datetime.fromtimestamp(ts/1e6).strftime('%Y-%m-%dT%H:%M:%S')

class Event(object):
    def __init__(self, ev):
        self.ts = int(ev[0])
        self.frame_id = int(ev[1])
        self.name = ev[2]
        self._tail = ev[3:]

class SubSourceEvent(Event):
    client_num = None
    attempt = None
    task_num = None

    def subreq_id(self):
        return self.client_num, self.attempt, self.task_num

    def subreq_sid(self):
        return self.client_num, self.task_num

    def __str__(self):
        return "{} {}".format(type(self).__name__, self.subreq_id())


class SubSourceError(SubSourceEvent):
    def __init__(self, ev):
        super(SubSourceError, self).__init__(ev)
        self.client_num = int(self._tail[0])
        self.attempt = int(self._tail[1])
        self.life_time = int(self._tail[2])
        self.status_code = int(self._tail[3])
        self.status_msg = self._tail[4]
        self.cont_name = self._tail[5]
        self.num_cl = int(self._tail[6])
        self.group = self._tail[7]
        self.task_num = int(self._tail[8])
        self.aliveness = float(self._tail[9])
        self.packet_loss = int(self._tail[10])
        self.error_text = self._tail[11]


class SubSourceOk(SubSourceEvent):
    def __init__(self, ev):
        super(SubSourceOk, self).__init__(ev)
        self.client_num = int(self._tail[0])
        self.attempt = int(self._tail[1])
        self.life_time = int(self._tail[2])
        self.wait_in_queue = int(self._tail[3])
        self.status_code = int(self._tail[4])
        self.status_msg = self._tail[5]
        self.task_num = int(self._tail[6])
        self.response_size = int(self._tail[7])
        self.packet_loss = int(self._tail[8])


class SubSourceInit(SubSourceEvent):
    def __init__(self, ev):
        super(SubSourceInit, self).__init__(ev)
        self.client_num = int(self._tail[0])
        self.attempt = int(self._tail[1])
        self.task_num = int(self._tail[2])
        self.client_name = self._tail[3]


class SubSourceRequest(SubSourceEvent):
    def __init__(self, ev):
        super(SubSourceRequest, self).__init__(ev)
        self.client_num = int(self._tail[0])
        self.attempt = int(self._tail[1])
        self.ip_addr = self._tail[2]
        self.port = int(self._tail[3])
        self.url = self.url = self._tail[4]
        self.task_num = int(self._tail[5])


class SubSourceSkip(SubSourceEvent):
    def __init__(self, ev):
        super(SubSourceSkip, self).__init__(ev)
        self.client_num = int(self._tail[0])
        self.attempt = int(self._tail[1])
        self.task_num = int(self._tail[2])
        self.status = self._tail[3]


class PrintN(FrameFunctor):
    """Print first N requests"""
    def __init__(self):
        self.count = int(sys.argv[2])

    def on_frame(self, frame):
        print_frame(frame)
        self.count -= 1

    def finished(self):
        return self.count == 0


class RunTasks(FrameFunctor):
    """Print RunTasks info"""
    def on_frame(self, frame):
        start = None
        count = -1
        name = "unknown"
        for event in frame:
            if event[2] == "DebugMessage":
                if event[3] == "RunTasks started":
                    start = int(event[0])
                    count = int(event[4])
                elif event[3] == "RunTasks finished" and start:
                    print name, int(event[0]) - start, count
            elif event[2] == "MainTaskStarted":
                name = event[4]


class MarkedRequests(FrameFunctor):
    """Filter requests urls"""
    def on_frame(self, frame):
        search = False
        request = None
        for event in frame:
            if event[2] == "ContextCreated" or event[2] == "FastCacheHit":
                request = event[3]
            if event[2] == "SearchedResultStat":
                search = True
                break

        if request:
            if search:
                print request + "&pron=nocachehit"
            else:
                print request


class TsCalc(FrameFunctor):
    """Print stat about tscalc metasearch interaction"""
    def on_frame(self, frame):
        start = None
        request = 0
        fails = 0
        success = 0
        time = 0
        for event in frame:
            if event[2] == "TsCalcAsyncRequest":
                request += 1
                if start == None:
                    start = int(event[0])
            elif event[2] == "TsCalcSuccess" and start:
                success += 1
                if time != 0:
                    return
                time = int(event[0]) - start
            elif event[2] == "TsCalcFail":
                fails += 1

        print request, success, fails, time


class Delta(FrameFunctor):
    """Print time between two events"""
    def __init__(self):
        self.Event1 = sys.argv[2]
        self.Event2 = sys.argv[3]

    def on_frame(self, frame):
        start = None
        for event in frame:
            if event[2] == self.Event1:
                start = int(event[0])
            elif event[2] == self.Event2 and start:
                print int(event[0]) - start
                start = None


class SubSourceTimes(FrameFunctor):
    """Report subsource response time"""
    def __init__(self):
        self.event = "SubSourceRequest"

    def on_frame(self, frame):
        start = None
        for event in frame:
            if event[2] == "MainTaskStarted":
                start = int(event[0])
                name = event[4]
                times = []
            elif event[2] == "MainTaskFinished" and start:
                print name, "\t".join(times), int(event[0]) - start
                start = None
            elif event[2] == self.event and start:
                times.append(str(int(event[0]) - start))


class CgiParams(FrameFunctor):
    """Filter cgi param from requests"""
    def __init__(self):
        self.p = sys.argv[2]

    def on_frame(self, frame):
        request = None
        for event in frame:
            if event[2] == "ContextCreated" or event[2] == "FastCacheHit":
                request = event[3]
                break

        if request:
            print " ".join(re.findall("[?&]" + self.p + "=([^&]+)", request))


class SelfTime(FrameFunctor):
    """Print self time (full time except waiting for replies) or frames where self time slower than threshold"""
    def __init__(self):
        self.threshold = int(sys.argv[2]) if len(sys.argv) > 2 else 0
    def on_frame(self, frame):
        waiting = 0
        start = None
        for event in frame:
            if event[2] == "MainTaskStarted":
                start = int(event[0])
            elif event[2] == "MainTaskFinished" and start:
                waiting += int(event[0]) - start

        v = frame_time(frame) - waiting
        if self.threshold:
            if v > self.threshold:
                print_frame(frame)
        else:
            print frame[0][1], v


class WWW(FrameFunctor):
    """Filter events with ReportMessage type www"""
    def on_frame(self, frame):
        for event in frame:
            if event[2] == "ReportMessage" and event[4] == "type" and event[5] == "www":
                print_frame(frame)
                return


class SubReqsTypes(FrameFunctor):
    """Print request types stats by subsource hosts"""
    def __init__(self):
        self.by_host_stat = {}

    def on_frame(self, frame):
        req_type = "none"
        for event in frame:
            if event[2] == "MainTaskStarted":
                if event[4] == "fetch":
                    req_type = "fetch"
                elif event[4] == "reask":
                    req_type = "reask"
                else:
                    req_type = "search"

            elif event[2] == "SubSourceRequest":
                m = re.match("http2?://([^/]+)/.*", event[7])
                host = m.group(1)
                self.by_host_stat.setdefault(host, {}).setdefault(req_type, 0);
                self.by_host_stat[host][req_type] += 1

    def on_finish(self):
        for host, stat in self.by_host_stat.items():
            print host, stat.get("search", 0), stat.get("fetch", 0), stat.get("reask", 0)


class SubReqs(FrameFunctor):
    """Print subsource requests"""

    class sr(object):
        def __init__(self, request, ok=None, error=None, skip=None):
            self.request = request
            self.ok = ok
            self.error = error
            self.skip = skip

        def __str__(self):
            return "{} {} {}".format(self.request, self.ok, self.error, self.skip)

    def on_frame(self, frame):
        current_task = ""
        subreqs = {}
        inits = {}

        for event in frame:
            if event[2] == "SubSourceInit":
                ev = SubSourceInit(event)
                inits[ev.subreq_sid()] = ev
            elif event[2] == "SubSourceRequest":
                ev = SubSourceRequest(event)
                subreqs[ev.subreq_id()] = self.sr(ev)
            elif event[2] == "SubSourceError":
                ev = SubSourceError(event)
                if ev.subreq_id() in subreqs:
                    subreqs[ev.subreq_id()].error = ev
            elif event[2] == "SubSourceOk":
                ev = SubSourceOk(event)
                if ev.subreq_id() in subreqs:
                    subreqs[ev.subreq_id()].ok = ev
            elif event[2] == "SubSourceSkip":
                ev = SubSourceSkip(event)
                if ev.subreq_id() in subreqs:
                    subreqs[ev.subreq_id()].skip = ev

        self.analyze(inits, subreqs)

    def analyze(self, inits, subreqs):
        for req in subreqs.itervalues():
            sid = req.request.subreq_sid()
            client_name = inits[sid].client_name if sid in inits else "UNKNOWN"
            client_num = inits[sid].client_num if sid in inits else -1
            start_time = req.request.ts
            host = get_hostname(req.request.url)
            error_text = ""

            if req.ok:
                status, res = "OK", req.ok
                status_code = res.status_code
                status_msg = res.status_msg
            elif req.error:
                status, res = "ERROR", req.error
                status_code = res.status_code
                status_msg = res.status_msg
                error_text = res.error_text
            elif req.skip:
                status, res = "SKIP", req.skip
                status_code = -1
                status_msg = res.status
            else:
                continue

            date = fmt_ts(res.ts)
            self.log(
                date,
                res.ts,
                res.frame_id,
                status,
                host,
                client_num,
                client_name,
                res.attempt,
                res.ts-start_time,
                status_code,
                status_msg,
                error_text,
            )

    def log(self, *args):
        print "\t".join(str(w) for w in args)

    def latency(self, sreq):
        if not sreq.request:
            return 0
        if sreq.ok:
            return sreq.ok.ts - sreq.request.ts
        elif sreq.error:
            return sreq.error.ts - sreq.request.ts
        else:
            return 0


class SubReqsFails(SubReqs):
    """Print failed subsource requests"""
    def analyze(self, inits, subreqs):
        attempts = {}

        for req in subreqs.itervalues():
            sid = req.request.subreq_sid()
            if sid not in attempts:
                attempts[sid] = []
            attempts[sid].append(req)

        for sid, reqs in attempts.iteritems():
            has_ok = False
            max_attempt = 0
            errs = []
            for req in reqs:
                if req.ok:
                    has_ok = True
                elif req.error:
                    errs.append((req.error.attempt, req.error.status_code, req.error.status_msg, req.error.error_text, self.latency(req)))
                    if req.error.attempt > max_attempt:
                        max_attempt = req.error.attempt

            if not has_ok:
                init = inits[sid]
                self.log(
                    fmt_ts(init.ts),
                    init.ts,
                    init.frame_id,
                    init.client_num,
                    init.client_name,
                    init.task_num,
                    max_attempt+1,
                    sorted(errs),
                )


class ClientRequests(FrameFunctor):
    """Print search requests to clients with ServerDescr == arg """
    def __init__(self):
        self.descr = sys.argv[2]

    def on_frame(self, frame):
        sources = {}
        for event in frame:
            if event[2] == "SubSourceInit":
                sources[event[3]] = event[6]
            if event[2] == "SubSourceRequest":
                if event[3] in sources and sources[event[3]] == self.descr:
                    if event[7].find("dh=") == -1:
                        print event[7]
                        return


class CachedFastHit(FrameFunctor):
    """Filter requests served from fast cache"""
    def on_frame(self, frame):
        for event in frame:
            if event[2] == "ContextCreated" or event[2] == "FastCacheHit":
                print event[2], get_user_request(event[3]), event[3]
                break


class Metarank(FrameFunctor):
    """Filter metarank requests"""
    def on_frame(self, frame):
        for event in frame:
            if event[2] == "StartMetaRank":
                print_frame(frame)
                return


class NoMetarank(FrameFunctor):
    """Filter no metarank requests"""
    def on_frame(self, frame):
        rank = False
        for event in frame:
            if event[2] == "StartMetaRank":
                rank = True
                break
        if not rank:
            print_frame(frame)


class Filter(FrameFunctor):
    """Generic request filter"""
    def __init__(self):
        self.reverted = (sys.argv[2][0] == '~')
        self.value = sys.argv[2].lstrip('~')

    def on_frame(self, frame):
        do_print = False
        for event in frame:
            if event[2] == "ContextCreated" or event[2] == "FastCacheHit":
                has_value = event[3].find(self.value) != -1
                do_print = (has_value and not self.reverted) or (not has_value and self.reverted)
                break

        if do_print:
            print_frame(frame)


class FilterStages(FrameFunctor):
    """Filter requests with specified stage pattern"""
    def on_frame(self, frame):
        started = None
        main_tasks = []
        for event in frame:
            if event[2] == "EnqueueYSRequest":
                started = True
            elif event[2] == "WaitFor":
                main_tasks.append(event[3])

        if started and "".join(main_tasks) == "".join(sys.argv[2:]):
            print_frame(frame)


class PrintSlow(FrameFunctor):
    """Filter slow requests"""
    def on_frame(self, frame):
        start, finish = frame_times(frame)

        if finish - start > int(sys.argv[2]):
            print_frame(frame)


class NotCached(FrameFunctor):
    """Prints notcached requests"""
    def on_frame(self, frame):
        request = None
        for event in frame:
            if event[2] == "ContextCreated":
                request = event[3]
            elif event[2] == "SearchedResultStat":
                if request:
                    print_frame(frame)
                return


class Cached(FrameFunctor):
    """Prints cached requests"""
    def on_frame(self, frame):
        request = None
        for event in frame:
            if event[2] == "ContextCreated":
                request = event[3]
            elif event[2] == "CachedResultStat":
                if request:
                    print_frame(frame)
                return


class Stages(FrameFunctor):
    """Print request stages"""
    def on_frame(self, frame):
        start = None
        finish = None
        stages = []
        for event in frame:
            time = int(event[0])
            if start is None:
                start = time
            finish = time
            if event[2] == "EnqueueYSRequest":
                start = time
            if event[2] == "WaitFor":
                stages.append(event[3])

            if event[2] == "ReportPrinted":
                finish = time
                break

        print "{0} {1}".format(finish - start, " ".join(stages))




class Markers(FrameFunctor):
    """Print some stat about slow (>200ms) middlesearch requests """
    def __init__(self):
        self.start_time = None
        self.finish_time = None
        self.requests = 0 #all requests
        self.markers = {}
        self.threshold = 200000

        self.names = [["total", "requests", "threshold", "waitsame"], ["hascache", "cacheonly", "nexttier", "invalidate", "nextpages", "manynextp", "rerun"], ["slow", "longsearch", "longfetch", "longfactors", "s2f", "c1f", "c2f"], ["metarank", "baserank", "manygroups", "debug"] ]

        self.speedup = {"fetch": [50, 40, 30], "search": [250, 220, 200, 150, 100], "allfactors": [50, 40, 30], "merge": [10, 5, 1] , "report": [10, 5, 1], "rerun": [], "prepare": [10, 20, 30], "rearrange": [5, 10, 20] }

        self.overrun = [5, 10, 20, 50]

        speedup_markers = []
        for stage in self.speedup:
            for bound in sorted(self.speedup[stage]):
                speedup_markers.append(stage + str(bound))
        self.names.append(speedup_markers)

        overrun_markers = []
        for v in self.overrun:
            overrun_markers.append("+" + str(v))
        self.names.append(overrun_markers)


        for name in itertools.chain(*self.names):
            self.markers[name] = 0

    def on_frame(self, frame):
        start, finish = frame_times(frame)
        if not self.start_time or self.start_time > start:
            self.start_time = start

        if not self.finish_time or self.finish_time < finish:
            self.finish_time = finish

        self.markers["requests"] += 1

        if finish - start <= self.threshold:
            return

        self.markers["threshold"] += 1

        for event in frame:
            if event[2] == "WaitFinishSameReqs" and (finish - start) - int(event[3]) < self.threshold:
                self.markers["waitsame"] += 1
                return

        local_markers = {}

        start_task = None
        current_task = None

        stage_times = {}
        stage_count = {}
        for stage in ["search", "fetch", "allfactors", "merge", "aux", "reask", "nextpage", "nexttier", "report", "rerun", "prepare", "rearrange"]:
            stage_times[stage] = 0
            stage_count[stage] = 0


        start_report = None
        start_rearrange = None

        search_time = 0
        fetch_time = 0

        metarank = 0
        start_merge = 0
        start_prepare = 0
        group_count = 0
        cache_only = True

        for event in frame:
            if event[2] == "ReportPrinted":
                if start_report:
                    stage_count["report"] += 1
                    stage_times["report"] += int(event[0]) - start_report
            elif event[2] == "StartRearrangeAfterMerge" or event[2] == "StartRearrangeAfterFetch":
                start_rearrange = int(event[0])
            elif event[2] == "StopRearrangeAfterMerge" or event[2] == "StopRearrangeAfterFetch" and start_rearrange:
                stage_count["rearrange"] += 1
                stage_times["rearrange"] += int(event[0]) - start_rearrange
            elif event[2] == "InvalidateCache":
                local_markers["invalidate"] = True
            elif event[2] == "FastCacheHit" or event[2] == "CachedResultStat":
                local_markers["hascache"] = True
            elif event[2] == "ReRunAgainSearch":
                local_markers["rerun"] = True
            elif event[2] == "MainTaskStarted":
                current_task = event[4]

                if current_task == "nexttier":
                    local_markers["nexttier"] = True

                if current_task not in ["aux"]:
                    cache_only = False

                start_task = int(event[0])
                if current_task == "nextpage" and "hascache" in local_markers:
                    local_markers["nextpages"] = True
            elif event[2] == "MainTaskFinished":
                stage_count[current_task] += 1
                stage_times[current_task] += int(event[0]) - start_task
            elif event[2] == "StartMetaRank":
                metarank = 1
            elif event[2] == "StartMergeGrouping":
                start_merge = int(event[0])
            elif event[2] == "StopMergeGrouping":
                stage_count["merge"] += 1
                stage_times["merge"] += int(event[0]) - start_merge
            elif event[2] == "DebugMessage":
                if event[3].find("slow request") != -1:
                    local_markers["slow"] = True
            elif event[2] == "StartPrepareClients":
                start_prepare = int(event[0])
            elif event[2] == "StopPrepareClients" and start_prepare:
                stage_count["prepare"] += 1
                stage_times["prepare"] += int(event[0]) - start_prepare
            elif event[2] == "ContextCreated":
                request = event[3]
                m = re.match(".*&g=[^\.]*\.[^\.]*\.(\d+)\..*", request)
                if m:
                    group_count = int(m.group(1))

                for v in ["dbgrlv", "timeout=", "fsgta="]:
                    if request.find(v) != -1:
                        local_markers["debug"] = True
                        break
            elif event[2] == "PageRetrieved":
                start_report = int(event[0])


        if not start or not finish:
            return

        if cache_only:
            self.markers["cacheonly"] += 1

        if group_count > 50:
            self.markers["manygroups"] += 1

        if metarank:
            self.markers["metarank"] += 1
        else:
            self.markers["baserank"] += 1

        total_time = finish - start

        if stage_times["search"] > 200000:
            local_markers["longsearch"] = True

        if stage_times["allfactors"] > 50000:
            local_markers["longfactors"] = True

        if stage_times["fetch"] > 50000:
            local_markers["longfetch"] = True

        if stage_count["fetch"] == 1 and stage_count["search"] == 0:
            local_markers["c1f"] = True

        if stage_count["fetch"] > 1:
            if stage_count["search"] > 0:
                local_markers["s2f"] = True
            else:
                local_markers["c2f"] = True

        if stage_count["nextpage"] > 1:
            local_markers["manynextp"] = True

        if total_time > self.threshold:
            for stage in self.speedup:
                for bound in self.speedup[stage]:
                    sum_bound = bound * stage_count[stage] * 1000
                    if sum_bound < stage_times[stage] and total_time - stage_times[stage] + sum_bound < self.threshold:
                        local_markers[stage + str(bound)] = True

            for v in self.overrun:
                if total_time < self.threshold + v*1000:
                    self.markers["+" + str(v)] += 1

        self.markers["total"] += 1
        for key in local_markers:
            self.markers[key] += 1

    def on_finish(self):
        total = self.markers["total"]
        if total == 0:
            print "no requests"
            return

        print str(datetime.datetime.fromtimestamp(self.start_time/1000000)) + " --- " + str(datetime.datetime.fromtimestamp(self.finish_time/1000000))
        fmt = "{0:>20}: {1:<5} {2:.3}"
        print fmt.format("requests", self.markers["requests"], "")
        print fmt.format("threshold", self.markers["threshold"], 100.0 * self.markers["threshold"] / self.markers["requests"])
        print fmt.format("plain", self.markers["total"], 100.0 * total / self.markers["threshold"])
        print fmt.format("waitsame", self.markers["waitsame"], 100.0 * self.markers["waitsame"]  / self.markers["threshold"])

        for group in self.names[1:]:
            print "----------------------------"
            for name in group:
                print fmt.format(name, self.markers[name], 100.0*self.markers[name] / total)


class ReqTime(FrameFunctor):
    """Print req time from EnqueueYsRequest to ReportPrinted or last event, unanswers and request type"""
    def on_frame(self, frame):
        start = None
        last_time = None
        frame_id = None
        unansw = 0
        fix_last_time = False
        cache = 'cache-miss'
        cpu_time = '0'
        unansw_stats = "?/?"
        for event in frame:
            if not fix_last_time:
                last_time = int(event[0])
            if event[2] == "EnqueueYSRequest":
                start = last_time
                req_type = event[4]
                frame_id = event[1]
            elif event[2] == "EnqueueYSRequestFail":
                req_type = 'FAIL'
            elif event[2] == "ReportPrinted":
                fix_last_time = True
            elif event[2] == "RequestStat" and event[7] == "0_TIER":
                if int(event[3]) == 0:
                    continue
                unansw = 1.0 * int(event[4]) / int(event[3])
                unansw_stats = "%s/%s" % (event[4], event[3])
            elif event[2] == 'CachedResultStat':
                cache = 'cache-hit'
            elif event[2] == 'CPUUsage':
                cpu_time = event[4]

        if start and last_time:
            print '\t'.join([fmt_ts(start), str(start), frame_id, str(last_time - start), "%.6f" % unansw, unansw_stats, req_type, cache, cpu_time])


class SearchTime(FrameFunctor):
    """Print search stage timings and unanswers, or frames with search time more than threshold"""
    def __init__(self):
        self.threshold = int(sys.argv[2]) if len(sys.argv) > 2 else None

    def on_frame(self, frame):
        start = None
        have_search = False
        unansw = 0
        for event in frame:
            if event[2] == "WaitFor" and not have_search:
                if event[3] == "search":
                    start = int(event[0])
                else:
                    start = None

            if event[2] == "RequestStat" and event[7] == "0_TIER":
                if int(event[3]) == 0:
                    return
                unansw = 1.0 * int(event[4]) / int(event[3])

            if event[2] == "MainTaskFinished" and start and not have_search:
                finish = int(event[0])
                have_search = True

        if start and finish:
            t = finish - start
            if self.threshold:
                if t > self.threshold:
                    print_frame(frame)
            else:
                print event[1], finish - start, unansw


class FetchTime(FrameFunctor):
    """Print Start-Stop snippet stage timings, or frames with FetchStage more than threshold"""
    def __init__(self):
        self.threshold = int(sys.argv[2]) if len(sys.argv) > 2 else None

    def on_frame(self, frame):
        start = None
        stage_count = 0
        for event in frame:
            if event[2] == "StartFetch":
                start = int(event[0])
                req_count = 0
                ok_count = 0
                stage_count += 1
            if event[2] == "SubSourceRequest" and start:
                req_count += 1
            if event[2] == "SubSourceOk" and start:
                ok_count += 1
            if event[2] == "StopFetch" and start:
                if self.threshold:
                    if int(event[0]) - start > self.threshold:
                        print_frame(frame)
                else:
                    print event[1], int(event[0]) - start, req_count, ok_count, stage_count

                start = None


class AllfactorTime(FrameFunctor):
    """Print Start-Stop allfactors stage timings, or frames with allfactors stage more then threshold"""
    def __init__(self):
        self.threshold = int(sys.argv[2]) if len(sys.argv) > 2 else None

    def on_frame(self, frame):
        start = None
        stage_count = 0
        for event in frame:
            if event[2] == "MainTaskStarted" and event[4] == "allfactors":
                start = int(event[0])
                req_count = 0
                ok_count = 0
                stage_count += 1
            if event[2] == "SubSourceRequest" and start:
                req_count += 1
            if event[2] == "SubSourceOk" and start:
                ok_count += 1
            if event[2] == "MainTaskFinished" and start:
                if self.threshold:
                    if int(event[0]) - start > self.threshold:
                        print_frame(frame)
                else:
                    print event[1], int(event[0]) - start, req_count, ok_count, stage_count

                start = None


class Yandsearch(FrameFunctor):
    """Filter yandsearch requests"""
    def on_frame(self, frame):
        for event in frame:
            if event[2] == "EnqueueYSRequest" and event[6] == "/yandsearch":
                for event in frame:
                    print "\t".join(event)

                return


class Diversity(FrameFunctor):
    """Filter diversity requests"""
    def on_frame(self, frame):
        for event in frame:
            if event[2] == "EnqueueYSRequest" and event[6] == "/diversity":
                for event in frame:
                    print "\t".join(event)

                return


class AllReqs(FrameFunctor):
    """Print requests to all collections"""
    def on_frame(self, frame):
        collection = "/"
        for event in frame:
            if event[2] == "EnqueueYSRequest":
                collection = event[6] + "?"
            if event[2] == "ContextCreated" or event[2] == "FastCacheHit":
                print collection + event[3]
                return


class Print(FrameFunctor):
    """Prints eventlog in format [time str, timestamp, time from start, delta time, frame id, event]"""
    def on_frame(self, frame):
        if not frame or (len(sys.argv) > 2 and sys.argv[2] != frame[0][1]):
            return

        prev_time = -1;
        start_time = -1;
        for event in frame:
            current_time = int(event[0])

            if prev_time < 0: prev_time = current_time
            if start_time < 0: start_time = current_time
            print "%s\t%d\t%d\t%d\t%s" % (fmt_ts(current_time), current_time, current_time - start_time, current_time - prev_time, "\t".join(event[1:]))
            prev_time = current_time


class GroupCount(FrameFunctor):
    """Prints asked group counts or frames with asked groups more than threshold"""
    def __init__(self):
        self.threshold = int(sys.argv[2]) if len(sys.argv) > 2 else None
    def on_frame(self, frame):
        for event in frame:
            if event[2] == "ContextCreated":
                request = event[3]
                m = re.match(".*&g=[^\.]*\.[^\.]*\.(\d+)\..*", request)
                if m and self.threshold and int(m.group(1)) <= self.threshold:
                    print_frame(frame)
                if m:
                    print event[1], m.group(1)
                return


class UnanswerStat(FrameFunctor):
    """Prints user requests and unanswer stat"""
    def on_frame(self, frame):
        stat = [-1, -1]
        request = ""
        frame_id = 0
        for event in frame:
            if event[2] == "ContextCreated":
                request = event[3]
                m = re.match(".*&user_request=([^&]*)&.*", request)
                if m:
                    request = urllib.unquote(m.group(1))
                frame_id = event[1]
                req_id = event[4]

            if event[2] == "RequestStat":
                total = int(event[3])
                unansw = int(event[4])
                if event[7] == "0_TIER" and total:
                    stat[0] = 1.0*unansw / total
                if event[7] == "1_TIER" and total:
                    stat[1] = 1.0*unansw / total

        if request:
            print frame_id, req_id, request, "\t".join([str(x) for x in stat])


class FetchStat(FrameFunctor):
    """Fetch stage and fetch req timings"""
    def on_frame(self, frame):
        requests = {}
        tasks = {}
        start_fetch = None
        last_request = ""
        for event in frame:
            if event[2] == "StartFetch":
                start_fetch = int(event[0])
            elif event[2] == "StopFetch":
                if start_fetch:
                    print "fetch", len(tasks),  event[1], int(event[0]) - start_fetch, ":", last_request
                start_fetch = None
            elif event[2] == "MainTaskStarted":
                requests = {}
                tasks = {}
                last_request = ""
            elif event[2] == "SubSourceRequest" and event[7].find("&DF=da") != -1:
                key = "{0}-{1}-{2}".format(event[3], event[4], event[8])
                m = re.match("http2?://([^/]+)/.*", event[7])
                requests[key] = {"request": event[7], "start": int(event[0]), "host": m.group(1)}
                tasks["{0}-{1}".format(event[3], event[8])] = 1

            elif event[2] == "SubSourceOk":
                key = "{0}-{1}-{2}".format(event[3], event[4], event[9])
                if key in requests:
                    del tasks["{0}-{1}".format(event[3], event[9])]
                    req_start = requests[key]["start"]
                    request = requests[key]["request"]
                    spm = 0
                    diqm = 0
                    for param in request.split("&"):
                        l = param.split("=")
                        if l[0] == "spm":
                            spm = l[1]
                        elif l[0] == "diqm":
                            diqm = l[1]

                    last_request = "\t".join([str(x) for x in [requests[key]["host"], int(event[3]), int(event[0]) - req_start, int(event[5]),  spm, diqm]])

                    print "request", last_request


class CacheTime(FrameFunctor):
    """Print getting results from cache time"""
    def on_frame(self, frame):
        start = None
        for event in frame:
            if event[2] == "TryLoadFromCache":
                start = int(event[0])
            if event[2] == "CachedResultStat":
                if start:
                    print '\t'.join([fmt_ts(start), str(start), event[1], str(int(event[0]) - start)])


class CacheHit(FrameFunctor):
    """Print cache hit"""
    def __init__(self):
        self.total = 0
        self.cached = 0
        self.cache_fast = 0
        self.cache_found = 0
        self.cache_snippet = 0
        self.cache_only = 0

        self.reqid_regexp = None
        if len(sys.argv) > 2:
            self.reqid_regexp = re.compile(sys.argv[2])
        self.n = 0
        if len(sys.argv) > 3:
            self.n = int(sys.argv[3])

    def on_frame(self, frame):
        cached = False
        has_search = False
        has_fetch = False
        cache_fast = False
        for event in frame:
            if event[2] == "EnqueueYSRequest":
                if event[4] != 'SEARCH' or event[6] != '/yandsearch':
                    return
            elif event[2] == "CachedResultStat":
                cached = True
            elif event[2] == "FastCacheHit":
                cached = True
                cache_fast = True
                break
            elif event[2] == 'SearchRequestCount':
                if event[3] != '0':
                    has_search = True
            elif event[2] == 'FetchRequestCount':
                if event[3] != '0':
                    has_fetch = True
            elif self.reqid_regexp and event[2] == 'ContextCreated':
                reqid = event[4]
                if not self.reqid_regexp.match(reqid):
                    return

        if cached:
            self.cached += 1
            if has_search:
                self.cache_found += 1
            elif has_fetch:
                self.cache_snippet += 1
            else:
                self.cache_only += 1
            if cache_fast:
                self.cache_fast += 1
        self.total += 1

    def on_finish(self):
        if self.total:
            print "Total: {0} cached: {1:.2f} cache_found: {2:.2f} cache_snippet: {3:.2f} cache_only: {4:.2f} cache_fast: {5:.2f}".format(self.total, 100.*self.cached / self.total, 100.*self.cache_found / self.total, 100.*self.cache_snippet / self.total, 100.*self.cache_only / self.total, 100.*self.cache_fast / self.total)

    def finished(self):
        return self.n and self.total >= self.n

class SubReqsLength(FrameFunctor):
    """Print subrequest length"""
    def on_frame(self, frame):
        for event in frame:
            if event[2] == "SubSourceRequest":
                print len(event[7])


def all_subclasses(classes):
    res = []
    for c in classes:
        res.append(c)
        res.extend(all_subclasses(c.__subclasses__()))
    return res

def main():
    functor = None
    if len(sys.argv) < 2:
        functor = Print()
    else:
        func_name = sys.argv[1]
        if func_name in globals():
            functorClass = globals()[sys.argv[1]]
            if issubclass(functorClass, FrameFunctor):
                functor = functorClass()

    if functor:
        by_frame(sys.stdin, functor)
    else:
        print "Usage: evlogdump ... | evlogview FunctorName args"
        print "Available functors: "
        funcs = []
        for func in all_subclasses([FrameFunctor]):
            funcs.append("{0:<20}: {1}".format(func.__name__, func.__doc__))

        for func in sorted(funcs):
            print func

if __name__ == "__main__":
    main()
