#!/usr/bin/env python

# -*- coding: UTF-8 -*-

import sys
import re
from collections import defaultdict, Counter
from argparse import ArgumentParser


def metric_name(*args):
    return "_".join(args)

def print_timings(name, data):
    for query, timings in data.items():
        if timings:
            packed_timings = map(lambda t: "%s@%s" % t, sorted(timings.items()))
            print("@{}_{} {}".format(name, query, ' '.join(packed_timings)))


def print_counter(name, data):
    for shard, val in sorted(data.items()):
        print ("{}_{} {}".format(name, shard, val))


def print_dict_counter(name, data):
    for shard, results in sorted(data.items()):
        for val_k, val_v in sorted(results.items()):
            print ("{}_{}_{} {}".format(name, shard, val_k, val_v))


results_aggr_count_mongo300 = Counter()
results_aggr_count_nret = Counter()
results_aggr_count_rpref = defaultdict(Counter)
results_aggr_count_size = Counter()
results_aggr_count_status = defaultdict(Counter)
results_aggr_timings = defaultdict(Counter)
results_count_nret = Counter()
results_count_rpref = defaultdict(Counter)
results_count_size = Counter()
results_count_status = defaultdict(Counter)
results_errors = Counter()
results_timings = defaultdict(Counter)

# root@mworker07g.disk.yandex.net: tskv tskv_format=ydisk-mpfs-requests-log host=mworker07g.disk.yandex.net name=mpfs.requests  appname=queue2.Worker-352   unixtime=1484823412 timestamp=2017-01-19 13:56:52,085   timezone=+0300  ycrid=web-b0587e535f7c729698055854bc7fa21e-ufo05h   request_id=0d8a3309-a914-480c-a918-c2285e9b94f5-handle_operation    pid=576060  module=logging  message=completed disk-unit-28.user_data.user_data.find_one(({'_id': '658d110b78aadbaf2449d5fa74967f54', 'uid': '1130000012308703'},), {'slave_ok': False, 'slave_okay': False, 'read_preference': 1}, mode=PRIMARY_PREFERRED).1 0 72 0.001

query_re = re.compile(r'(?P<q_status>(completed|failed)) (?P<q_shard>common|common3|blockings)\.(?P<q_collection>[\w_\-]+)\.[\w_\-$]+\.(?P<q_op>[\w_\-]+)\(.* (?P<q_took>\d+\.\d+)$')
query_read_re = re.compile(r'mode=(?P<q_rpref>[A-Z_]+)\)\.\d+(?: socket_time: (?P<q_socket_time>\d+\.\d+))? (?P<q_nret>\d+) (?P<q_size>\d+) (?P<q_took>\d+\.\d+)$')
YCRIDS = {'-', 'andr', 'dav', 'ios', 'lnx', 'mac', 'mpfs', 'public', 'rest', 'sdk', 'web', 'win', 'wp'}

def parse_line(line, detailed=False):
    line = line.strip()
    if not line:
        return

    if 'module=logging' not in line:
        return

    ycrid = line[line.find('\tycrid=') + 7:].split('\t')[0]
    if '-' not in ycrid or ycrid == '-':
        return
    ycrid = ycrid.split('-')[0]
    if ycrid.startswith('rest_'):
        ycrid = ycrid[5:]

    if ycrid not in YCRIDS:
        ycrid = 'malformed'

    message_index = line.find('\tmessage=')
    message = line[message_index + 9:].split('\t')[0]

    if message.split()[0] not in {'completed', 'failed'}:
        return

    query_matches_message = query_re.search(message)
    if not query_matches_message:
        results_errors['parse_line'] += 1
        return

    query_matches = query_matches_message.groupdict()
    q_op = query_matches['q_op'].lower()
    shards = [query_matches['q_shard']]
    q_collection = query_matches['q_collection']

    q_status = query_matches['q_status']
    q_took = query_matches['q_took']

    if shards[0].startswith('disk-unit'):
        shards.append('unit')

    meter = q_op

    if q_op in ['find', 'find_one', 'getMore']:
        q_type = 'read'
        if detailed:
            query_detail_message = query_read_re.search(message)
            if query_detail_message:
                query_detail = query_detail_message.groupdict()
            else:
                results_errors['parse_details'] += 1
                return

            q_rpref = query_detail['q_rpref'].lower()
            q_nret = int(query_detail['q_nret'])
            q_size = int(query_detail['q_size'])
    else:
        q_type = 'write'
        if detailed:
            q_rpref = 'primary'
            q_nret = 0
            q_size = 0

    for q_shard in shards:
        if detailed:
            key = metric_name(q_shard, meter)
            results_timings[key][q_took] += 1
            results_count_status[key][q_status] += 1
            results_count_nret[key] += q_nret
            results_count_size[key] += q_size
            results_count_rpref[key][q_rpref] += 1

        for meter in [q_type, 'total']:
            key = metric_name(q_shard, meter)
            collection_key = metric_name(q_shard, q_collection, meter)
            for k in [key, collection_key]:
                results_aggr_timings[k][q_took] += 1
                results_aggr_count_status[k][q_status] += 1
                if meter == 'total':
                    results_aggr_count_status[k][metric_name(q_status, ycrid)] += 1

            if detailed:
                results_aggr_count_nret[key] += q_nret
                results_aggr_count_size[key] += q_size
                results_aggr_count_rpref[key][q_rpref] += 1

        if q_took >= '0.3':
            results_aggr_count_mongo300[q_shard] += 1
            results_aggr_count_mongo300[metric_name('', ycrid, q_shard)] += 1

def print_results(detailed=False):
    print_counter('query_aggr_count_mongo300', results_aggr_count_mongo300)
    print_dict_counter('query_aggr_count_status', results_aggr_count_status)
    print_timings('query_aggr_timings', results_aggr_timings)
    print_counter('error', results_errors)

    if detailed:
        print_counter('query_aggr_count_nret', results_aggr_count_nret)
        print_counter('query_aggr_count_size', results_aggr_count_size)
        print_counter('query_count_nret', results_count_nret)
        print_counter('query_count_size', results_count_size)
        print_dict_counter('query_aggr_count_rpref', results_aggr_count_rpref)
        print_dict_counter('query_count_rpref', results_count_rpref)
        print_dict_counter('query_count_status', results_count_status)
        print_timings('query_timings', results_timings)


if __name__ == '__main__':
    argument_parser = ArgumentParser(description='Parse requests.log')
    argument_parser.add_argument('-s', '--short', action='store_false', help='Skip detailed statistics')
    args = argument_parser.parse_args()
    for line in sys.stdin:
        parse_line(line, args.short)
    print_results(args.short)
