#!/usr/bin/env python

# -*- coding: UTF-8 -*-

import re
import sys
from collections import Counter, defaultdict


def print_timings(name, data):
    for key, timings in sorted(data.items()):
        if timings:
            packed_timings = map(lambda t: "%s@%s" % t, sorted(timings.items()))
            print("@{}_{} {}".format(name, key, ' '.join(packed_timings)))


def print_counter(name, data):
    for shard, val in sorted(data.items()):
        print("{}_{} {}".format(name, shard, val))


results_count_query_status = Counter()
results_timings_query = defaultdict(Counter)

results_count_aggr_query_status = Counter()
results_timings_aggr_query = defaultdict(Counter)

results_count_request_status = Counter()
results_timings_request = defaultdict(Counter)

results_count_aggr_request_status = Counter()
results_timings_aggr_request = defaultdict(Counter)

results_count_request_status_code = Counter()


# tskv   tskv_format=ydisk-dataapi-log   logtime=2016-06-17 15:53:19,931 logtimezone=+0300   unixtime=1466167999
# level=INFO  thread=pool-4-thread-29 ycrid=rest-5b41d683b5bbfd9f6ce3157bc9c5b1c8-api05h  rid=uIS5DZ0Q
# class=r.y.mi.io.http.apache.v4.ApacheHttpClient4Utils
# message=HTTP POST http://push.yandex.ru/v1/send?uid=390713035&stoken=74a50d7bec15d12d8ce8fd0e82b0a21b025f5853
# &event=datasync_database_changed&tags=.ext.maps_common_ytransportbookmarks%2Capp_.ext.maps_common_ytransportbookmarks
# completed successfully; took 0.027

http_re = re.compile(
    r'HTTP (?P<r_type>[A-Z]+) https?:\/\/(?P<r_host>[^\/]+)\/.* '
    r'(?P<r_status>completed|failed)([^;]+?)?(?:code (?P<r_status_code>\d{3}))?; took (?P<r_took>\d+.\d+)$'
)

# tskv    tskv_format=ydisk-dataapi-log   logtime=2016-08-18 16:30:46,125 logtimezone=+0300       unixtime=1471527046
# level=DEBUG     thread=qtp614785733-218 ycrid=rest-1483b026db9558e4f2290cd7a57dee0f-api06g      rid=YipQKbZq
# class=r.y.ch.jdbc.q  message=Q: SELECT * FROM databases_093 WHERE (app = 'profile') AND (user_id = '43969629')
# AND (dbId = 'addresses'): completed at master apidb01d.disk.yandex.net/api_disk_data, rc=1; took 0.001

q_query_re = re.compile(
    r'(?:\(LONG\) )?Q: (?:/\*.*?\*/ )?(?P<q_op>[A-Z]+) .*: (?P<q_status>completed|failed) at (?P<q_state>master|slave) '
    r'(?P<q_host>[^\/]+)\/(?P<q_db>[^,;]+)(?:, rc=(?P<q_rc>\d+))?; took (?P<q_took>\d+.\d+)'
)


for line in sys.stdin:
    if not line.strip():
        continue

    class_index = line.find('\tclass=')
    class_field = line[class_index + 7:].split('\t')[0]
    message_index = line.find('\tmessage=')
    message = line[message_index + 9:].split('\t')[0]

    if class_field == 'r.y.mi.io.http.apache.v4.ApacheHttpClient4Utils':
        request_matches = http_re.search(message)
        if not request_matches:
            continue
        r_type = request_matches.group('r_type').lower()
        r_host = request_matches.group('r_host').replace('.', '_')
        r_status = request_matches.group('r_status')
        r_status_code = request_matches.group('r_status_code')
        r_took = request_matches.group('r_took')

        results_timings_request['_'.join((r_host, r_type))][r_took] += 1
        results_count_request_status['_'.join((r_host, r_type, r_status))] += 1

        results_timings_aggr_request['_'.join((r_host, 'total'))][r_took] += 1
        results_count_aggr_request_status['_'.join((r_host, 'total', r_status))] += 1

        if r_status_code:
            results_count_request_status_code['_'.join((r_host, 'total', r_status_code))] += 1

        # Count failed requests without 404 codes
        if r_status == 'failed' and r_status_code != '404':
            results_count_request_status['_'.join((r_host, r_type, 'failed-404'))] += 1
            results_count_aggr_request_status['_'.join((r_host, 'total', 'failed-404'))] += 1

    elif class_field == 'r.y.ch.jdbc.q':
        q_query_matches = q_query_re.search(message)
        if not q_query_matches:
            continue

        q_op = q_query_matches.group('q_op').lower()
        q_status = q_query_matches.group('q_status')
        q_took = q_query_matches.group('q_took')
        q_host = q_query_matches.group('q_host')
        q_db = q_query_matches.group('q_db')
        if q_host.endswith('db.yandex.net'):
            shard = q_db
        else:
            shard = q_host.split('.', 1)[0][:-1]

        results_timings_query[q_op][q_took] += 1
        results_count_query_status['_'.join((q_op, q_status))] += 1
        results_timings_query['_'.join((shard, q_op))][q_took] += 1
        results_count_query_status['_'.join((shard, q_op, q_status))] += 1

        if q_op == 'select':  # mb inaccuracy
            q_type = 'read'
        elif q_op in ['begin', 'commit', 'rollback']:
            q_type = 'transproc'
        else:
            q_type = 'write'

        for meter in [q_type, 'total']:
            results_timings_aggr_query[meter][q_took] += 1
            results_count_aggr_query_status['_'.join((meter, q_status))] += 1
            results_timings_aggr_query['_'.join((shard, meter))][q_took] += 1
            results_count_aggr_query_status['_'.join((shard, meter, q_status))] += 1


print_counter('query_count_status', results_count_query_status)
print_timings('query_timings', results_timings_query)
print_counter('query_count_aggr_status', results_count_aggr_query_status)
print_timings('query_timings_aggr', results_timings_aggr_query)

print_counter('request_count_status', results_count_request_status)
print_counter('request_count_aggr_status', results_count_aggr_request_status)
print_timings('request_timings', results_timings_request)
print_timings('request_timings_aggr', results_timings_aggr_request)

print_counter('request_count_status_code', results_count_request_status_code)
