#!/usr/bin/python
# -*- coding: utf-8 -*-

# делалось для https://st.yandex-team.ru/DIRECT-152492
# принимает на stdin записи из slow-log'а mysql, и считает суммарное время, группируя по типам sql-запросов
# имеет смысл, если запросы на входе уже имеют что-то общее (например, происходят от вызовов одного и того же метода)

import re
import sys

total_time = 0.0
total_queries = 0
total_time_by_query = {}
total_queries_of_kind = {}
for line in sys.stdin:
    query_time = float(re.search(r'Query_time: (\S+)', line).group(1)) * 1000  # convert to milliseconds
    query = re.search(r'select\s+/\* .*? */(.*)', line).group(1)
    m = re.search(r'(.*\S)\s+from\s+(.*)', query)
    fields_str = m.group(1)
    rest = m.group(2)
    fields = []
    n = 0
    f = ""
    for c in fields_str + ',':
        if c == '(':
            n += 1
        if re.match(r'\s', c) and n == 0:
            f = ""
        if n > 0 or (re.match(r'\S', c) and c != ','):
            f += c
        if c == ')':
            n -= 1
        if c == ',' and n == 0:
            fields.append(f)
            f = ""
    fields = sorted([s.replace('`', '') for s in fields])
    tables = sorted([s.split()[0].replace('`', '') for s in re.split(r'\s+join\s+', rest)])
    if len(fields) > 2:
        redacted_fields = fields[0:2] + ['...']
    else:
        redacted_fields = fields
    redacted_query = " ".join(['SELECT', str(redacted_fields), 'FROM', str(tables)])
    if redacted_query not in total_time_by_query:
        total_time_by_query[redacted_query] = 0.0
        total_queries_of_kind[redacted_query] = 0
    total_time += query_time
    total_queries += 1
    total_time_by_query[redacted_query] += query_time
    total_queries_of_kind[redacted_query] += 1

print "%.0f ms / %d queries total" % (total_time, total_queries)
for q in sorted(total_time_by_query.keys(), key=lambda x: total_time_by_query[x], reverse=True):
    print '%.0f ms / %d queries: %s' % (total_time_by_query[q], total_queries_of_kind[q], q)
