#!/usr/bin/env python

# -*- coding: UTF-8 -*-
import sys
import re
from collections import defaultdict, Counter

transaction_re = re.compile(r'(?P<event>(begin|commit|rollback) transaction) => (?P<status>[A-Za-z]+), took (?P<time>\d+) ms$')
request_re = re.compile(r';\s+(?P<event>SELECT|INSERT|UPDATE|UPSERT|DELETE).+ => (?P<status>[A-Za-z]+), took (?P<time>\d+) ms$')
retries_re = re.compile(r'(?P<status>OK|ERROR).*, finished after (?P<retries>\d+) retries, (?P<time>\d+) ms total$')

request_event_statuses = defaultdict(Counter)
request_event_timings = defaultdict(Counter)

retries_statuses = defaultdict(Counter)
timings_with_retries = defaultdict(Counter)


def safe_parse_int(number):
    try:
        return int(number)
    except ValueError:
        return 0


def parse_tskv_field(tskv, field_name):
    field = '\t' + field_name + '='
    index = tskv.find(field)
    return tskv[index + len(field):].split('\t')[0]


def calculate_request_event_metrics_by_regexp(message, regexp):
    request_matches = regexp.search(message)
    if request_matches:
        event = request_matches.group('event').replace(' ', '_')
        status = request_matches.group('status').upper()
        time = safe_parse_int(request_matches.group('time'))

        request_event_statuses[event][status] += 1
        request_event_timings[event][time] += 1
        request_event_timings['total'][time] += 1

    return request_matches


def calculate_retries_metrics_by_regexp(message, regexp):
    request_matches = regexp.search(message)
    if request_matches:
        status = request_matches.group('status').upper()
        retries = request_matches.group('retries') + '_retries'
        time = safe_parse_int(request_matches.group('time'))

        retries_statuses[status][retries] += 1
        timings_with_retries[status][time] += 1
        timings_with_retries['total'][time] += 1

    return request_matches


def print_timings(name, data):
    for query, timings in data.items():
        if timings:
            packed_timings = map(lambda t: "%s@%s" % t, sorted(timings.items()))
            print("@{}_{} {}".format(name, query, ' '.join(packed_timings)))


def print_counter(name, data):
    for key, val in sorted(data.items()):
        print("{}_{} {}".format(name, key, val))


def print_dict_counter(name, data):
    for first, results in sorted(data.items()):
        print_counter(name + '_' + first, results)


lines = sys.stdin
for line in lines:
    if not line.strip():
        continue

    class_field = parse_tskv_field(line, 'class')
    message = parse_tskv_field(line, 'message')

    if class_field == 'com.yandex.ydb.table.Session':
        if calculate_request_event_metrics_by_regexp(message, transaction_re):
            continue
        if calculate_request_event_metrics_by_regexp(message, request_re):
            continue
    elif class_field == 'com.yandex.ydb.table.SessionRetryContext':
        if calculate_retries_metrics_by_regexp(message, retries_re):
            continue

print_dict_counter('request_count', request_event_statuses)
print_dict_counter('request_count', retries_statuses)

print_timings('request_timings', request_event_timings)
print_timings('request_timings_with_retries', timings_with_retries)
