#!/usr/bin/env pyhon

from datetime import datetime,timedelta
import time,urlparse
import threading
from sys import stdin,stdout,stderr,exit,argv
import traceback
import logging
#from collections import Counter
import json

class Counter(object):
    def __init__(self, *arr):
        self.storage = {}
        if arr:
            for i in arr:
                if i in self.storage:
                    self.storage[i] += 1
                else:
                    self.storage[i] = 1
    def update(self, arr):
        for i in arr:
            if i in self.storage:
                self.storage[i] += 1
            else:
                self.storage[i] = 1
    def append(self, arg):
        if arg in self.storage:
            self.storage[arg] += 1
        else:
            self.storage[arg] = 1

    def clear(self):
        self.storage = {}
    def items(self):
        return self.storage.items()

def split_uri(uri):
    if '?' in uri:
        path, params_str = uri.split('?')
    else:
        return {"path": uri}
    params = {"path":path}
    p = params_str.split('&')
    for i in p:
        if '=' in i:
            k,v = i.split('=')
            params[k]=v
        else:
            params[i]=True
    return params

class Stats(object):
    def __init__(self, groups, fields):
        self.storage = {}
        self.groups = []
        if groups:
            self.groups = [g for g in groups if g in fields]
        self.fields = fields
        self.old_seconds = set()

    def update_fields(self, target, params):
        for f, v in params.items():
            if f not in target:
                self.put_fields(target, f)
            target[f].append(v)

    def put_fields(self, target, f):
        if f in self.fields:
            f_type = self.fields[f]
        else:
            raise ValueError("Unknown stat field: %s" % f)
        if f_type == 'counter':
            target[f] = Counter()
        elif f_type == 'list':
            target[f] = []
        else:
            raise ValueError('unknown type %s' % f_type)

    def add(self, ts, params):
        if ts in self.old_seconds:
            raise ValueError("Stats for second %d (%s) was already dumped!" % (ts, ts_to_date(ts)))

        if ts  not in self.storage:
            self.storage[ts] = {}
        if self.groups:
            for group in self.groups:
                grp_val = params[group]
                params = dict((k,params[k]) for k in params if k!=group)
                if group not in self.storage[ts]:
                    self.storage[ts][group] = {}
                self.update_fields(self.storage[ts][group], params)

    def seconds(self):
        return sorted(self.storage.keys())

    def group_stat(self, ts, group, src):
        fields = {}
        for f, f_type in self.fields.items():
            if f in src:
                if f_type == 'counter':
                    counters = src[f].items()
                elif f_type == 'list':
                    counters = count_stats(src[f]).items()
                else:
                    raise ValueError('unknown type %s' % f_type)
                fields = exc_merge(fields, dict([(f+'-'+i[0], i[1]) for i in counters]))
        return {"fields":fields, "name": group, "timestamp":ts}

    def pop_second(self, ts):
        sec_stat = self.storage[ts]
        result = {}
        for group in sec_stat:
            result[group] = self.group_stat(ts, group, sec_stat[group])
        del(self.storage[ts])
        self.old_seconds.add(ts)
        log.info("Removed old second %d ( %s ) from stats" % (ts, ts_to_date(ts)))
        return result

def exc_merge(*args):
    acc = {}
    for d in args:
        for key in d:
            if key not in acc:
                acc[key] = d[key]
            else:
                raise KeyError("Trying to merge stats on the same key: %s" % key)
    return acc

def ts_to_date(ts):
    date = datetime.fromtimestamp(int(ts)).strftime('%Y-%m-%d %H:%M:%S')
    return date

def count_stats(l):
    result = {}
    count = len(l)
    if count > 0:
        result['avg'] = float(sum(l))/count
        result['90p'] = sorted(l)[int(count*0.90)]
        result['max'] = max(l)
    return result

#[2017-Apr-24 18:59:02.931987] xwFhi60fjiE1: info   ravatt connect=error, check=error, host='127.0.0.1:1345', delay=03.000094, size=55931, status='unknown', msg='Connection timed out', try=1
#[2017-Apr-24 18:59:05.932135] xwFhi60fjiE1: info   ravatt connect=error, check=error, host='127.0.0.1:1345', delay=03.000098, size=55931, status='unknown', msg='Connection timed out', try=2
#[2017-Apr-24 18:59:08.932307] xwFhi60fjiE1: info   ravatt connect=error, check=error, host='127.0.0.1:1345', delay=03.000122, size=55931, status='unknown', msg='Connection timed out', try=3
#[2017-Apr-24 18:59:11.932415] xwFhi60fjiE1: info   ravatt connect=error, check=error, host='127.0.0.1:1345', delay=03.000053, size=55931, status='unknown', msg='Connection timed out', try=4
#
#[2017-Apr-24 18:57:58.798090] rvFtj30f18c1: info   ravatt connect=ok, check=ok, host='127.0.0.1:1345', delay=05.386048, size=33934, status='clean', msg='Success', try=1


def parse_line(line):
    fields = line.split(' ')
    time_str = fields[0] + ' ' + fields[1]
    timeformat = '[%Y-%b-%d %H:%M:%S.%f]'
    recieved_ts = time.mktime(datetime.strptime(time_str, timeformat).timetuple())
    req_type = fields[3].strip('"')
    params_raw = line.rstrip().split('ravatt ')[1].split(', ')
    param_keys = ['connect', 'check', 'delay', 'size', 'status', 'try' ]
    params = {'yavs': 1}
    
    for i in params_raw:
        name, val = i.split('=')
        name = name.strip()
        val = val.strip("'")
        if name in ['delay', 'size']:
            val = float(val)
        if name in param_keys:
            params[name]=val
    
    recieved_unix = int(recieved_ts)
    return ((recieved_unix, params),)

def dump_count(stats, dump_ts):
    avail_seconds = stats.seconds()
    if dump_ts in avail_seconds:
        data = stats.pop_second(dump_ts)
        for group in data:
            stdout.write(json.dumps(data[group]))
            stdout.write('\n')
        stdout.flush()

class MonitorTread(object):

    def __init__(self, stats, offset=0.9):
        self.stats = stats
        self.offset = offset
        thread = threading.Thread(target=self.run, args=())
        thread.daemon = True                            # Daemonize thread
        thread.start()                                  # Start the execution

    def run(self):
        """ Method that runs forever """
        while True:
            now = time.time()
            sched_time = int(now)-1
            seconds = self.stats.seconds()
            dump_count(self.stats, min(seconds + [sched_time]))

            next_ts = int(now) + 1 + self.offset
            #delta = (next_ts - now).total_seconds()
            delta = next_ts - now
            log.info("Sleep for: %s", delta)
            time.sleep(delta)

if __name__ == '__main__':

    interactive = False
    if '-i' in argv:
        interactive = True
    log = logging.getLogger('yavs_stat')
    log.setLevel(logging.ERROR)
    ch = logging.StreamHandler()
    formatter = logging.Formatter('%(asctime)s - %(filename)s:%(lineno)d(%(funcName)s) - %(levelname)s - %(message)s')
    ch.setFormatter(formatter)
    log.addHandler(ch)
    stats = Stats(('yavs',), {'connect':'counter', 'check':'counter', 'delay':'list', 'size':'list', 'status':'counter', 'try':'counter', 'yavs':'counter' })
    #manhole.install(locals={'s':stats})
    if interactive:
        log.setLevel(logging.INFO)
        monitor = MonitorTread(stats)
    try:
        for line in iter(stdin.readline, ''):
            if 'ravatt' in line:
                try:
                    parsed = parse_line(line)
                    #log.info("Got parsed sata %s", parsed)
                    for ts, data in parsed:
                        stats.add(ts, data)
                except Exception, exc:
                    traceback.print_exc(exc)
                    log.error("Error parsing on line %s" % line)
    except KeyboardInterrupt:
        exit(1)
    if not interactive:
        times_list = stats.seconds()
        for t in times_list:
            dump_count(stats, t)
