#!/usr/bin/env python
# -*- coding: UTF-8 -*-

import os
import re
import sys
from collections import defaultdict, Counter


def metric_name(*args):
    return "_".join(args)

def print_timings(name, data):
    for query, timings in data.items():
        if timings:
            packed_timings = map(lambda t: "%s@%s" % t, sorted(timings.items()))
            print("@{}_{} {}".format(name, query, ' '.join(packed_timings)))


def print_counter(name, data):
    for shard, val in sorted(data.items()):
        print ("{}_{} {}".format(name, shard, val))


def print_dict_counter(name, data):
    for shard, results in sorted(data.items()):
        for val_k, val_v in sorted(results.items()):
            print ("{}_{}_{} {}".format(name, shard, val_k, val_v))

results_count_codes = Counter()
results_aggr_count_codes = Counter()
results_timings = defaultdict(Counter)
error_parse = 0

# 2016-01-22 20:32:34,950 [14603] rest-507fdc388405329194a9a166e3eeb8e0-api01h 14603_16997 client GET "http://ratelimiter.disk.yandex.net:1880/cloud_api_user/25799403" 200 0 0 0.003

index_re = re.compile('([A-Z]+ )?"https?:\/\/(?P<b_host>[^\/:]+).*" (?P<b_status>\d{3}) \d+ \d+ (?P<b_request_time>\d+\.\d+)$')
aliases = { re.compile("^uploader\d+") : "disk_uploader",
            re.compile("^webdav\d+") : "disk_webdav",
            re.compile("^push\d+") : "disk_push",
            re.compile("^xmpp\d+") : "disk_xmpp" }

for line in sys.stdin:
    line = line.strip()
    if not line:
        continue

    if '\tmodule=client' not in line and '\tmodule=common_service' not in line:
        continue

    message_index = line.find('\tmessage=')
    message = line[message_index + 9:].split('\t')[0]

    matches = index_re.search(message)
    if not matches:
        error_parse += 1
        continue

    b_host = matches.group('b_host')
    for alias_re, alias in aliases.iteritems():
        if alias_re.match(b_host):
            b_host = alias
            break

    host = b_host.replace('.', '_')
    u_status = matches.group('b_status')
    acode = u_status[0] + 'xx'
    results_timings[host][matches.group('b_request_time')] += 1
    results_count_codes[metric_name(host, u_status)] += 1
    results_aggr_count_codes[metric_name(host, acode)] += 1
    results_aggr_count_codes[metric_name(host, 'total')] += 1
    results_aggr_count_codes[metric_name('total', 'total')] += 1


print_counter("request_count_code", results_count_codes)
print_counter("request_aggr_count_code", results_aggr_count_codes)
print_timings("request_timings", results_timings)
print("error_parse {}".format(error_parse))
