#!/usr/bin/env pyhon

from datetime import datetime,timedelta
import time,urlparse
import threading
from sys import stdin,stdout,stderr,exit,argv
import traceback
import logging
#from collections import Counter
import json

class Counter(object):
    def __init__(self, *arr):
        self.storage = {}
        if arr:
            for i in arr:
                if i in self.storage:
                    self.storage[i] += 1
                else:
                    self.storage[i] = 1
    def update(self, arr):
        for i in arr:
            if i in self.storage:
                self.storage[i] += 1
            else:
                self.storage[i] = 1
    def append(self, arg):
        if arg in self.storage:
            self.storage[arg] += 1
        else:
            self.storage[arg] = 1

    def clear(self):
        self.storage = {}
    def items(self):
        return self.storage.items()

def split_uri(uri):
    if '?' in uri:
        path, params_str = uri.split('?')
    else:
        return {"path": uri}
    params = {"path":path}
    p = params_str.split('&')
    for i in p:
        if '=' in i:
            k,v = i.split('=')
            params[k]=v
        else:
            params[i]=True
    return params

class Stats(object):
    def __init__(self, grouped, fields):
        self.storage = {}
        self.grouped = grouped
        self.fields = fields
        self.old_seconds = set()

    def update_fields(self, target, params):
        for f, v in params.items():
            if f not in target:
                self.put_fields(target, f)
            target[f].append(v)

    def put_fields(self, target, f):
        if f in self.fields:
            f_type = self.fields[f]
        else:
            raise ValueError("Unknown stat field: %s" % f)
        if f_type == 'counter':
            target[f] = Counter()
        elif f_type == 'list':
            target[f] = []
        else:
            raise ValueError('unknown type %s' % f_type)

    def add(self, ts, params):
        if ts in self.old_seconds:
            raise ValueError("Stats for second %d (%s) was already dumped!" % (ts, ts_to_date(ts)))

        group = params[self.grouped]
        params = dict((k,params[k]) for k in params if k!=self.grouped)
        if ts  not in self.storage:
            self.storage[ts] = {}
        if group not in self.storage[ts]:
            self.storage[ts][group] = {}
        self.update_fields(self.storage[ts][group], params)

    def seconds(self):
        return sorted(self.storage.keys())

    def group_stat(self, ts, group, src):
        fields = {}
        for f, f_type in self.fields.items():
            if f in src:
                if f_type == 'counter':
                    counters = src[f].items()
                elif f_type == 'list':
                    counters = count_stats(src[f]).items()
                else:
                    raise ValueError('unknown type %s' % f_type)
                fields = exc_merge(fields, dict([(f+'-'+i[0], i[1]) for i in counters]))
        return {"fields":fields, "name": group, "timestamp":ts}

    def pop_second(self, ts):
        sec_stat = self.storage[ts]
        result = {}
        for group in sec_stat:
            result[group] = self.group_stat(ts, group, sec_stat[group])
        del(self.storage[ts])
        self.old_seconds.add(ts)
        log.info("Removed old second %d ( %s ) from stats" % (ts, ts_to_date(ts)))
        return result

def exc_merge(*args):
    acc = {}
    for d in args:
        for key in d:
            if key not in acc:
                acc[key] = d[key]
            else:
                raise KeyError("Trying to merge stats on the same key: %s" % key)
    return acc

def ts_to_date(ts):
    date = datetime.fromtimestamp(int(ts)).strftime('%Y-%m-%d %H:%M:%S')
    return date

def count_stats(l):
    result = {}
    count = len(l)
    if count > 0:
        result['avg'] = float(sum(l))/count
        result['90p'] = sorted(l)[int(count*0.90)]
        result['max'] = max(l)
    return result


#[2017-02-10 13:38:02.281718] 2a02:6b8:0:f2d:2016:304:1107:1 "POST /put/9002.3001785061.961515850165314516514850691871?_X_Proxy_Client_Address=2a02%3a6b8%3a0%3af2d%3a%3a2f&comp=none&msg&mulca=1&service=yaback&unit_type=ham HTTP/1.0" result 200 "OK" "547" 821 168 "2cOfMl5geKo3" 0.048

def parse_line(line):
    fields = line.split(' ')
    time_str = fields[0] + ' ' + fields[1]
    timeformat = '[%Y-%m-%d %H:%M:%S.%f]'
    recieved_ts = time.mktime(datetime.strptime(time_str, timeformat).timetuple())
    req_type = fields[3].strip('"')
    params = split_uri(fields[4])
    if 'service' in params:
        service = params['service']
    else:
        service = 'none'
    req_time = float(fields[-1])*1000
    status = fields[7]
    try:
        raw_size = fields[9].strip('"')
        if raw_size:
            msg_size = float(raw_size)/1024
        else:
            msg_size = 0.
        kbin = float(fields[-4])/1024
        kbout = float(fields[-3])/1024
    except:
        msg_size = 0.
        kbin = 0.
        kbout = 0.
    recieved_unix = int(recieved_ts)
    return ((recieved_unix, {'service':service, 'type': req_type, 'time':req_time, 'status': status}),
            (recieved_unix, {'msg': msg_size, "in":kbin, "out": kbout, 'service':service+"-size"}))

def dump_count(stats, dump_ts):
    avail_seconds = stats.seconds()
    if dump_ts in avail_seconds:
        data = stats.pop_second(dump_ts)
        for group in data:
            stdout.write(json.dumps(data[group]))
            stdout.write('\n')
        stdout.flush()

class MonitorTread(object):

    def __init__(self, stats, offset=0.9):
        self.stats = stats
        self.offset = offset
        thread = threading.Thread(target=self.run, args=())
        thread.daemon = True                            # Daemonize thread
        thread.start()                                  # Start the execution

    def run(self):
        """ Method that runs forever """
        while True:
            now = datetime.now()
            dump_time = int(time.mktime(now.timetuple()))-1
            dump_count(self.stats, dump_time)

            next_ts = datetime.fromtimestamp(time.mktime(now.timetuple()) + 1 + self.offset)
            #delta = (next_ts - now).total_seconds()
            td = next_ts - now
            delta = td.days*3600*24 + td.seconds + float(td.microseconds)/10**6
            log.info("Sleep for: %s", delta)
            time.sleep(delta)

if __name__ == '__main__':

    interactive = False
    if '-i' in argv:
        interactive = True
    log = logging.getLogger('notif_stat')
    log.setLevel(logging.ERROR)
    ch = logging.StreamHandler()
    formatter = logging.Formatter('%(asctime)s - %(filename)s:%(lineno)d(%(funcName)s) - %(levelname)s - %(message)s')
    ch.setFormatter(formatter)
    log.addHandler(ch)
    stats = Stats('service', {'msg': 'list', 'in': 'list', 'out': 'list', 'type': 'counter', 'time': 'list', 'status': 'counter'})
    #manhole.install(locals={'s':stats})
    if interactive:
        log.setLevel(logging.INFO)
        monitor = MonitorTread(stats)
    try:
        for line in iter(stdin.readline, ''):
            try:
                parsed = parse_line(line)
                for ts, data in parsed:
                    stats.add(ts, data)
            except Exception, exc:
                traceback.print_exc(exc)
                log.error("Error parsing on line %s" % line)
    except KeyboardInterrupt:
        exit(1)
    if not interactive:
        times_list = stats.seconds()
        for t in times_list:
            dump_count(stats, t)
