#!/usr/bin/env python
# -*- coding: UTF-8 -*-
import re
import sys
from collections import Counter, defaultdict


def tskv_get_key(line, key):
    key_index = line.find('\t%s=' % key)
    return line[key_index + (len(key)+2):].split('\t')[0]


def gen_key(*args):
    return '_'.join(args)


parse_errors = 0
results_timings_request = defaultdict(Counter)
results_count_request_status = Counter()
results_count_aggr_request_status = Counter()
results_timings_query = defaultdict(Counter)
results_count_query_status = Counter()
results_timings_aggr_query = defaultdict(Counter)
results_count_aggr_query_status = Counter()
results_timings_aggr_request = defaultdict(Counter)

http_re = re.compile(
    r'HTTP (?P<r_type>[A-Z]+) https?:\/\/(?P<r_host>[^\/]+)\/.* (?P<r_status>completed|failed)([^;]+?)?'
    r'(?:code (?P<r_status_code>\d{3}))?; took (?P<r_took>\d+.\d+)$'
)

q_query_re = re.compile(
    r'(?:\(LONG\) )?Q: (?P<q_op>[A-Z]+) .*: (?P<q_status>completed|failed) at (?P<q_state>master|slave) '
    r'(?P<q_host>[^\/]+)\/(?P<q_db>[^,;]+)(?:, rc=(?P<q_rc>\d+))?; took (?P<q_took>\d+.\d+)'
)

for line in sys.stdin:
    line = line.strip()
    if not line:
        continue

    level = tskv_get_key(line, 'level')
    message = tskv_get_key(line, 'message')
    java_class = tskv_get_key(line, 'class')

    if java_class == 'r.y.mi.io.http.apache.v4.ApacheHttpClient4Utils':
        request_matches = http_re.search(message)
        if not request_matches:
            continue

        r_type = request_matches.group('r_type').lower()
        r_host = request_matches.group('r_host').replace('.', '_')
        r_status = request_matches.group('r_status')
        r_status_code = request_matches.group('r_status_code')
        r_took = request_matches.group('r_took')

        host_type = gen_key(r_host, r_type)
        results_timings_request[host_type][r_took] += 1
        results_count_request_status[gen_key(host_type, r_status)] += 1
        results_timings_aggr_request[gen_key(r_host, 'total')][r_took] += 1
        results_count_aggr_request_status[gen_key(r_host, 'total', r_status)] += 1

        # Count failed requests without 404 codes
        if r_status == 'failed' and r_status_code != '404':
            results_count_request_status[gen_key(host_type, 'failed-404')] += 1
            results_count_aggr_request_status[gen_key(r_host, 'total', 'failed-404')] += 1

    elif java_class == 'r.y.ch.jdbc.q':
        q_query_matches = q_query_re.search(message)
        if not q_query_matches:
            parse_errors += 1
            continue

        q_op = q_query_matches.group('q_op').lower()
        q_status = q_query_matches.group('q_status')
        q_took = q_query_matches.group('q_took')

        meter = q_op

        results_timings_query[meter][q_took] += 1
        results_count_query_status[gen_key(meter, q_status)] += 1

        if q_op == 'select':  # mb inaccuracy
            q_type = 'read'
        elif q_op in ['begin', 'commit', 'rollback']:
            q_type = 'transproc'
        else:
            q_type = 'write'

        for meter in [q_type, 'total']:
            results_timings_aggr_query[meter][q_took] += 1
            results_count_aggr_query_status[gen_key(meter, q_status)] += 1


def print_status(name, result):
    for key, value in sorted(result.items()):
        print("%s %s" % (gen_key(name, key), value))


def print_timings(name, result):
    for key, timings in sorted(result.items()):
        if not timings:
            continue
        packed_timings = map(lambda t: "%s@%s" % t, sorted(timings.items()))
        print("@%s %s" % (gen_key(name, key), " ".join(packed_timings)))


# pg requests

print_status('query_count_status', results_count_query_status)
print_timings('query_timings', results_timings_query)
print_status('query_count_aggr_status', results_count_aggr_query_status)
print_timings('query_timings_aggr', results_timings_aggr_query)


# http requests
print_status('request_count_status', results_count_request_status)
print_status('request_count_aggr_status', results_count_aggr_request_status)
print_timings('request_timings', results_timings_request)
print_timings('request_timings_aggr', results_timings_aggr_request)
