#!/usr/bin/env python

import re
import os
import socket
import time
import math
import json

from collections import defaultdict
from datetime import datetime, timedelta
from BaseHTTPServer import BaseHTTPRequestHandler, HTTPServer


EPOCH = datetime(1970, 1, 1, 0)

line_re1 = re.compile(r'^\[([^\[\]]+)\].* Total time for which application threads were stopped: (\S+) seconds, Stopping threads took: \S+ seconds$')
line_re2 = re.compile(r'^\[([^\[\]]+)\].* Application time: (\S+) seconds$')

def reversed_fp_iter(fp, buf_size=8192):
    # holds possible incomplete segment at the beginning of the buffer
    segment = None
    offset = 0
    fp.seek(0, os.SEEK_END)
    file_size = remaining_size = fp.tell()
    while remaining_size > 0:
        offset = min(file_size, offset + buf_size)
        fp.seek(file_size - offset)
        buffer = fp.read(min(remaining_size, buf_size))
        remaining_size -= buf_size
        lines = buffer.splitlines(True)
        # the first line of the buffer is probably not a complete line so
        # we'll save it and append it to the last line of the next buffer
        # we read
        if segment is not None:
            # if the previous chunk starts right from the beginning of line
            # do not concat the segment to the last line of new chunk
            # instead, yield the segment first
            if buffer[-1] == '\n':
                yield segment
            else:
                lines[-1] += segment
        segment = lines[0]
        for index in range(len(lines) - 1, 0, -1):
            if len(lines[index]):
                yield lines[index]
    # Don't yield None if the file was empty
    if segment is not None:
        yield segment


class LogParser(object):
    def __init__(self, gc_logs_dir, gc_log_prefix="gc.log", backtrack_seconds=60):
        self.gc_logs_dir = gc_logs_dir
        self.gc_log_prefix = gc_log_prefix
        self.backtrack_seconds = backtrack_seconds

    def last_gc_files(self):
        if not os.path.exists(self.gc_logs_dir):
            return []

        paths = [os.path.join(self.gc_logs_dir, item) for item in
                 os.listdir(self.gc_logs_dir) if os.path.basename(item).startswith(self.gc_log_prefix)]

        last_gc_files = [
            (item, os.path.getmtime(item)) for item in paths
            if os.path.isfile(item) and item
        ]

        return sorted(last_gc_files, key=lambda x: x[1], reverse=True)

    def parse(self):
        d1_sec = defaultdict(int)
        d5_sec = defaultdict(int)

        now = time.time()

        stop = False
        for gc_file in self.last_gc_files():
            with open(gc_file[0]) as fp:
                for line in reversed_fp_iter(fp):
                    line = line.strip()

                    mather_pause = line_re1.match(line)
                    if mather_pause:
                        ts = time.strptime(mather_pause.group(1)[:23],'%Y-%m-%dT%H:%M:%S.%f')
                        pause = long(float(mather_pause.group(2)) * 1000000)
                        timestamp = (time.mktime(ts)  - time.mktime(EPOCH.timetuple()))

                        if (now - timestamp) > self.backtrack_seconds:
                            stop = True
                            break

                        d1_sec[timestamp] += pause
                        d5_sec[int(timestamp / 5) * 5] += pause
            if stop:
                break

        return {'1sec': d1_sec, '5sec': d5_sec}


class Accum(object):
    INTERVAL_COEFF = 1.5
    INTERVAL_LOG = math.log(INTERVAL_COEFF)

    def __init__(self, capacity):
        self.zeros = 0
        self.counters = [0] * capacity

    def update(self, value):
        if (value <= 0):
            self.zeros += 1
        else:
            floor = math.floor(math.log(value) / self.INTERVAL_LOG)
            index = int(floor)
            if (index < 0):
                self.zeros += 1
            elif index >= len(self.counters):
                self.counters[len(self.counters) - 1] += 1
            else:
                self.counters[index] += 1

    def get_counters(self):
        limit = len(self.counters)
        while limit > 0:
            if self.counters[limit - 1] > 0:
                break
            else:
                limit -=1

        result = [[0.0, self.zeros]]
        low_bound = 1.0
        for i in range(limit):
            result.append([low_bound, self.counters[i]])
            low_bound *= self.INTERVAL_COEFF
        return result


class YasmHandler(BaseHTTPRequestHandler):
    parser = None

    def _set_headers(self):
        self.send_response(200)
        self.send_header('Content-type', 'application/json')
        self.end_headers()

    def do_GET(self):
        self._set_headers()
        result = []
        for interval, data in self.parser.parse().items():
            acc = Accum(100)
            for timestamp, pause in data.items():
                acc.update(pause)
            result.append(["jvm.gc.stw_pause_%s_dhhh" % interval, acc.get_counters()])
        self.wfile.write(json.dumps(result))

    def do_HEAD(self):
        self._set_headers()


class HTTPServerV6(HTTPServer):
    address_family = socket.AF_INET6


def run(server_class=HTTPServerV6, handler_class=YasmHandler, port=80):
    server_address = ('', port)
    httpd = server_class(server_address, handler_class)
    httpd.serve_forever()


if __name__ == "__main__":
    from sys import argv
    YasmHandler.parser = LogParser(argv[1])
    run(port=int(argv[2]))
