#!/usr/bin/env python

# -*- coding: UTF-8 -*-

import os
import re
import sys
from pprint import pprint
from collections import defaultdict


results_errors = { 'parse' : 0 }

s_name = 'office';

## grab urls:
meters_urls = { 
    'post_v1_disk_wopi_files' : ['POST /v1/disk/wopi/files'],
    'get_v1_disk_wopi_files' : ['GET /v1/disk/wopi/files'],
}

urls_meters = dict();
for meter,urls in meters_urls.items():
    for url in urls:
        urls_meters[url] = meter
meters_urls['others'] = None
meters_urls['total'] = None

# 
timings_urls_all = ['post_v1_disk_wopi_files', 'get_v1_disk_wopi_files', 'others', 'total']
count_urls_codes = ['post_v1_disk_wopi_files', 'get_v1_disk_wopi_files', 'others', 'total']


# codes
results_count_urls_codes = dict() 
for mu_key in count_urls_codes:
    results_count_urls_codes[mu_key] = defaultdict(int)

results_count_urls_acodes  = dict()
for mu_key in count_urls_codes:
    results_count_urls_acodes[mu_key] = defaultdict(int)


# timings

results_timings_urls_all = defaultdict(list)
results_timings_urls_total = list()

url_re = re.compile('^\/(v1\/disk\/wopi\/files)')


for line in sys.stdin:

    parts = line.strip("\n").split("\t")
    parsed = {};
    for part in parts:
        eq = part.find('=')
        key = part[:eq]
        value = part[(eq+1):]
        parsed[key] = value


    url_matches = url_re.findall(parsed['request'])
    if len(url_matches):

        b_request = url_matches[0]
        b_request_url = parsed['method'] + ' /' + b_request
        if b_request_url in urls_meters:
            u_request_url = urls_meters[b_request_url]
        else:
            u_request_url = 'others'


        if u_request_url in timings_urls_all:
            results_timings_urls_all[u_request_url].append(parsed['request_time'])

        if u_request_url in count_urls_codes:
            u_status = int(parsed['status'])
            results_count_urls_codes[u_request_url][u_status] += 1

    else:
        results_errors['parse'] += 1
        continue
#        print line

# aggregate
for u_request_url, result in sorted(results_count_urls_codes.items()):
    for u_status_k, u_status_v in sorted (result.items()):
        if u_status_k < 200:
            results_count_urls_acodes[u_request_url]['1xx'] += u_status_v
            results_count_urls_acodes['total']['1xx'] += u_status_v
        elif u_status_k < 300:
            results_count_urls_acodes[u_request_url]['2xx'] += u_status_v
            results_count_urls_acodes['total']['2xx'] += u_status_v
        elif u_status_k < 400:
            results_count_urls_acodes[u_request_url]['3xx'] += u_status_v
            results_count_urls_acodes['total']['3xx'] += u_status_v
        elif u_status_k < 500:
            results_count_urls_acodes[u_request_url]['4xx'] += u_status_v
            results_count_urls_acodes['total']['4xx'] += u_status_v
        elif u_status_k < 600:
            results_count_urls_acodes[u_request_url]['5xx'] += u_status_v
            results_count_urls_acodes['total']['5xx'] += u_status_v
        elif u_status_k not in ['total', 'others']:
            results_count_urls_codes[u_request_url]['unknown'] += u_status_v
            results_count_urls_acodes['total']['unknown'] += u_status_v

        results_count_urls_acodes[u_request_url]['total'] += u_status_v
        results_count_urls_acodes['total']['total'] += u_status_v

    
# count_urls_codes (codes per url)
for u_request_url, result in sorted(results_count_urls_codes.items()):
    for u_status_k, u_status_v in sorted (result.items()):
        print("%s_count_request_%s_code_%s %d" % (s_name, u_request_url, u_status_k, u_status_v))


# count_urls_codes (codes per url)
for u_request_url, result in sorted(results_count_urls_acodes.items()):
    for u_status_k, u_status_v in sorted (result.items()):
        print("%s_count_request_%s_acode_%s %d" % (s_name, u_request_url, u_status_k, u_status_v))


for u_request_url, timings in sorted(results_timings_urls_all.items()):
    if len(timings):
	results_timings_urls_total.extend(timings)
        print("%s_timings_request_%s %s" % (s_name, u_request_url, ' '.join(timings) ))

if len(results_timings_urls_total):
    print("%s_timings_%s %s" % (s_name, 'total', ' '.join(results_timings_urls_total) ))


for error, value in sorted(results_errors.items()):
    print("%s_error_%s %d" % (s_name, error, value))



sys.exit(0)

