#!/usr/bin/env python

# -*- coding: UTF-8 -*-

import sys
import re
from collections import defaultdict, Counter


errors_parse_line = 0
results_count_query_status = Counter()
results_aggr_qtype_count_query_status = Counter()
results_timings_query = defaultdict(Counter)
results_aggr_qtype_timings_query = defaultdict(Counter)


#tskv    tskv_format=ydisk-dataapi-log   logtime=2016-08-18 16:30:46,125 logtimezone=+0300
#unixtime=1471527046     level=DEBUG     thread=qtp614785733-218
#ycrid=rest-1483b026db9558e4f2290cd7a57dee0f-api06g      rid=YipQKbZq    class=r.y.ch.jdbc.q
#message=Q: /* uid: 123456789 */ SELECT * FROM databases_093 WHERE (app = 'profile') AND
#(user_id = '43969629') AND (dbId = 'addresses'): completed at master apidb01d.disk.yandex.net/api_disk_data, rc=1;
#took 0.001

q_query_re = re.compile(
    '(?:\(LONG\) )?Q: (?:\/\*[^*]+\*\/ )?(?P<q_op>[A-Z]+) .*: '
    '(?P<q_status>completed|failed) at (?P<q_state>master|slave) '
    '(?P<q_host>(?P<q_cluster>[^\d]+\d+)[^\/]+)\/(?P<q_db>[^,;]+)(?:, rc=(?P<q_rc>\d+))?; '
    'took (?P<q_took>\d+.\d+)'
)

def tskv_get_key(line, key):
    key_index = line.find('\t%s=' % key)
    return line[key_index + (len(key)+2):].split('\t')[0]


def gen_key(*args):
    return '_'.join(args)


for line in sys.stdin:
    if not line.strip():
        continue

    if '\tclass=r.y.ch.jdbc.q' not in line:
        continue

    level = tskv_get_key(line, 'level')
    message = tskv_get_key(line, 'message')

    q_query_matches = q_query_re.search(message)
    if not q_query_matches:
        errors_parse_line += 1
        continue

    q_op = q_query_matches.group('q_op').lower()
    q_status = q_query_matches.group('q_status')
    q_state = q_query_matches.group('q_state')
    q_host = q_query_matches.group('q_host').replace('.', '_')
    q_took = str(q_query_matches.group('q_took'))
    q_cluster = q_query_matches.group('q_cluster')

    if '_db_yandex_net' in q_host:
        q_cluster = q_query_matches.group('q_db')
        shard = q_cluster.rsplit('_', 1)[-1]
        q_host = 'mdb%s_%s' % (shard, q_host)

    if q_op == 'select':
        q_type = 'read'
    elif q_op in ('begin', 'commit', 'rollback'):
        q_type = 'transproc'
    else:
        q_type = 'write'

    for meter in [q_state, q_host, 'total']:
        key_prefix = gen_key(q_cluster, meter)
        results_count_query_status[gen_key(key_prefix, q_op, q_status)] += 1
        results_aggr_qtype_count_query_status[gen_key(key_prefix, q_type, q_status)] += 1
        results_aggr_qtype_count_query_status[gen_key(key_prefix, 'total', q_status)] += 1

        results_timings_query[gen_key(key_prefix, q_op)][q_took] += 1
        results_aggr_qtype_timings_query[gen_key(key_prefix, q_type)][q_took] += 1
        results_aggr_qtype_timings_query[gen_key(key_prefix, 'total')][q_took] += 1


def print_query_status_counters(name, result):
    for key, value in sorted(result.items()):
            print("%s %s" % (gen_key(name, key), value))


def print_query_timings(name, result):
    for key, timings in sorted(result.items()):
        if not timings:
            continue
        packed_timings = map(lambda t: "%s@%s" % t, sorted(timings.items()))
        print("@%s %s" % (gen_key(name, key), " ".join(packed_timings)))


print_query_status_counters('query_status', results_count_query_status)
print_query_status_counters('query_aggr_qtype_status', results_aggr_qtype_count_query_status)
print_query_timings('query_timings', results_timings_query)
print_query_timings('query_aggr_qtype_timings', results_aggr_qtype_timings_query)
print('errors_parse_line %s' % errors_parse_line)
