#!/usr/bin/env python

# -*- coding: UTF-8 -*-

import os
import re
import sys
from pprint import pprint
from collections import defaultdict


results_errors = { 'parse' : 0 }

s_name = 'event-loader';

# Collect errors from logs
results_log_errors = dict()
for error in ['index', 'transaction', 'parse']:
    results_log_errors[error] = 0

# Collect http requests statuses and timings
results_host_timings = dict()
results_host_status = dict()
for host in ['index_disk_yandex_net', 'ratelimiter_disk_yandex_net', 'disk-producer_search_yandex_net', 'others']:
    results_host_status[host] = dict()
    for status in ['success', 'error', 'unknown']:
        results_host_status[host][status] = 0
    results_host_timings[host] = list()


# Collect history lag timings
results_lag_timings = list()


# Collect misc stat
results_stats = dict()
for stat in ['events_total']:
    results_stats[stat] = 0
#
http_re = re.compile('^HTTP POST http://([^\/:]+)[\/:][^ ]+ (completed successfully|failed) with code \d+; took (\d+\.\d+)$')
lag_re = re.compile('^Event time lag: (\d+) s$')

for line in sys.stdin:
    if line.strip() == '':
	continue

    parts = line.strip("\n").split("\t")
    parsed = {};
    for part in parts:
        eq = part.find('=')
        key = part[:eq]
        value = part[(eq+1):]
        parsed[key] = value        

    level = parsed['level']
    message = parsed['message']
#    pprint(parsed)

    if level == 'ERROR':
        #error_matches = error_re.findall(parsed['message'])
        if message.startswith("Got error from listener"):
            results_log_errors['transaction'] += 1
        elif message.startswith("Event is not saved to index"):
            results_log_errors['index'] += 1
        elif message.startswith("Error while parsing event of type"):
            results_log_errors['parse'] += 1

    elif level == 'INFO':
#        print message 
        matches = lag_re.findall(message)
        if len (matches):
            b_lag = matches[0]
            results_lag_timings.append(b_lag)
            results_stats['events_total'] += 1
        else:
            matches = http_re.findall(message)
            if len (matches):
#            pprint (matches)
                b_host = matches[0][0]
                b_status = matches[0][1]
                b_time = matches[0][2]
            
                u_host = b_host.replace('.', '_')
            
                if b_status == 'completed successfully':
                    u_status = 'success'
                elif b_status == 'failed':
                    u_status = 'error'
                else:
                    u_status = 'unknown'

                results_host_status[u_host][u_status] += 1
                results_host_timings[u_host].append(b_time)

    
# count_urls_codes (codes per url)
for u_host, result in sorted(results_host_status.items()):
    for u_status_k, u_status_v in sorted (result.items()):
        print("%s_count_request_%s_code_%s %d" % (s_name, u_host, u_status_k, u_status_v))


for u_host, timings in sorted(results_host_timings.items()):
    if len(timings):
        print("%s_timings_request_%s %s" % (s_name, u_host, ' '.join(timings) ))

for error, result in sorted (results_log_errors.items()):
    print("%s_count_log_error_%s %d" % (s_name, error, result))

for error, value in sorted(results_errors.items()):
    print("%s_error_%s %d" % (s_name, error, value))

print ("%s_timings_lag %s" % (s_name, ' '.join(results_lag_timings) ))

for stat, value in sorted(results_stats.items()):
    print ("%s_stat_%s %d" % (s_name, stat, value))



sys.exit(0)

