#!/usr/bin/env python
# -*- coding: UTF-8 -*-
import re
import sys
from collections import Counter, defaultdict


def tskv_get_key(line, key):
    key_index = line.find('\t%s=' % key)
    return line[key_index + (len(key)+2):].split('\t')[0]


results_count_request_status = defaultdict(dict)
results_timings_request = defaultdict(Counter)

results_count_aggr_request_status = defaultdict(dict)
results_timings_aggr_request = defaultdict(Counter)

http_re = re.compile(
    r'(HTTP )?(?P<r_type>[A-Z]+) https?://(?P<r_host>[^/:]+)[/:].* '
    r'(?P<r_status>completed|failed)([^;]+?)?(code (?P<r_status_code>\d{3}))?; '
    r'took (?P<r_took>\d+.\d+)$'
)


for line in sys.stdin:
    if not line.strip():
        continue

    if '\tclass=r.y.mi.io.http.apache.v4.ApacheHttpClient4' not in line:
        continue

    level = tskv_get_key(line, 'level')
    message = tskv_get_key(line, 'message')

    request_matches = http_re.search(message)
    if not request_matches:
        continue

    r_type = request_matches.group('r_type').lower()
    r_host = request_matches.group('r_host').replace('.', '_')
    r_status = request_matches.group('r_status')
    r_status_code = request_matches.group('r_status_code') or '200'
    r_status_code_group = '{}xx'.format(r_status_code[0] if r_status_code else '2')
    r_took = request_matches.group('r_took')

    if r_host not in results_count_request_status:
        results_count_request_status[r_host] = defaultdict(Counter)
        results_timings_request[r_host] = defaultdict(Counter)

        results_count_aggr_request_status[r_host] = defaultdict(Counter)
        results_timings_aggr_request[r_host] = defaultdict(Counter)

    results_timings_request[r_host][r_type][r_took] += 1
    results_count_request_status[r_host][r_type][r_status] += 1
    results_count_request_status[r_host][r_type]['{}_{}'.format(r_status, r_status_code)] += 1
    results_count_request_status[r_host][r_type]['{}_{}'.format(r_status, r_status_code_group)] += 1

    results_timings_aggr_request[r_host]['total'][r_took] += 1
    results_count_aggr_request_status[r_host]['total'][r_status] += 1
    results_count_aggr_request_status[r_host]['total']['{}_{}'.format(r_status, r_status_code)] += 1
    results_count_aggr_request_status[r_host]['total']['{}_{}'.format(r_status, r_status_code_group)] += 1

    # Count failed requests without 404 codes
    if r_status == 'failed' and r_status_code != '404':
        results_count_request_status[r_host][r_type]['failed-404'] += 1
        results_count_aggr_request_status[r_host]['total']['failed-404'] += 1


def print_status_counters(name, aggr_result):
    for host, results in sorted(aggr_result.iteritems()):
        for op, values in sorted(results.iteritems()):
            for val_k, val_v in sorted(values.iteritems()):
                print("%s_%s_%s_%s %s" % (name, host, op, val_k, val_v))


def print_timings(name, result):
    for host, results in sorted(result.iteritems()):
        for request, timings in sorted(results.iteritems()):
            if timings:
                packed_timings = map(lambda t: "%s@%s" % t, sorted(timings.items()))
                print("@%s_%s_%s %s" % (name, host, request, " ".join(packed_timings)))


print_status_counters('request_count_status', results_count_request_status)
print_status_counters('request_count_aggr_status', results_count_aggr_request_status)
print_timings('request_timings', results_timings_request)
print_timings('request_timings_aggr', results_timings_aggr_request)
