#!/usr/bin/env python

# -*- coding: UTF-8 -*-

import os
import re
import sys
import pprint
from collections import defaultdict


results_errors = { 'parse' : 0 }

s_name = 'smartcache-client';

## grab urls:
meters_urls = { 
    'get_smartcache_photoslice-init-snapshot' : ['GET /smartcache/photoslice-init-snapshot'],
    'get_smartcache_smartcache-deltas-list' : ['GET /smartcache/smartcache-deltas-list'],
    'get_smartcache_smartcache-snapshot' : ['GET /smartcache/smartcache-snapshot']
}

urls_meters = dict();
for meter,urls in meters_urls.items():
    for url in urls:
        urls_meters[url] = meter
meters_urls['others'] = None
meters_urls['total'] = None

# 
timings_urls_all = [ 'get_smartcache_photoslice-init-snapshot', 'get_smartcache_smartcache-deltas-list', 'get_smartcache_smartcache-snapshot' ]
count_urls_codes = [ 'get_smartcache_photoslice-init-snapshot', 'get_smartcache_smartcache-deltas-list', 'get_smartcache_smartcache-snapshot', 'others', 'total' ]

# codes
results_count_urls_codes = dict() 
for mu_key in count_urls_codes:
    results_count_urls_codes[mu_key] = defaultdict(int)

results_count_urls_acodes  = dict()
for mu_key in count_urls_codes:
    results_count_urls_acodes[mu_key] = defaultdict(int)



# timings

results_timings_urls_all = defaultdict(list)
results_timings_urls_total = list()

#index_re = ': (?P<b_request_url>[A-Z]+ \/[^ ?]+)[ ?][^ ]* ?HTTP\/\d\.\d "[^"]+" (?P<b_status>\d{3}) (?:\d+\.\d+\.\d+\.\d+|[0-9a-fA-F:]+) (?P<b_request_time>\d+\.\d+)$'
index_re = ': "(?P<b_request_url>[A-Z]+ \/[^ ?]+)[ ?][^"]*" ?HTTP\/\d\.\d "[^"]+" "[^"]+" (?P<b_status>\d{3}) (?:\d+\.\d+\.\d+\.\d+|[0-9a-fA-F:]+) -1 \d+ (?P<b_request_time>\d+\.\d+)$'


for line in sys.stdin:
#    print line

    line = line.strip()
    matches = re.search(index_re, line)
#    pprint.pprint(matches.groups())
    if matches:

        b_request_url = matches.group('b_request_url')
        if b_request_url in urls_meters:
            u_request_url = urls_meters[b_request_url]
        else:
            u_request_url = 'others'

        if u_request_url in timings_urls_all:
            results_timings_urls_all[u_request_url].append(matches.group('b_request_time'))

        if u_request_url in count_urls_codes:
            u_status = int(matches.group('b_status'))
            results_count_urls_codes[u_request_url][u_status] += 1

    else:
        results_errors['parse'] += 1
        continue
#    print "%s %s %s" % (u_request_url, u_status, u_request_time )

# aggregate
for u_request_url, result in sorted(results_count_urls_codes.items()):
    for u_status_k, u_status_v in sorted (result.items()):
        if u_status_k < 200:
            results_count_urls_acodes[u_request_url]['1xx'] += u_status_v
            results_count_urls_acodes['total']['1xx'] += u_status_v
        elif u_status_k < 300:
            results_count_urls_acodes[u_request_url]['2xx'] += u_status_v
            results_count_urls_acodes['total']['2xx'] += u_status_v
        elif u_status_k < 400:
            results_count_urls_acodes[u_request_url]['3xx'] += u_status_v
            results_count_urls_acodes['total']['3xx'] += u_status_v
        elif u_status_k < 500:
            results_count_urls_acodes[u_request_url]['4xx'] += u_status_v
            results_count_urls_acodes['total']['4xx'] += u_status_v
        elif u_status_k < 600:
            results_count_urls_acodes[u_request_url]['5xx'] += u_status_v
            results_count_urls_acodes['total']['5xx'] += u_status_v
        elif u_status_k not in ['total', 'others']:
            results_count_urls_codes[u_request_url]['unknown'] += u_status_v
            results_count_urls_acodes['total']['unknown'] += u_status_v

        results_count_urls_acodes[u_request_url]['total'] += u_status_v
        results_count_urls_acodes['total']['total'] += u_status_v

    
# count_urls_codes (codes per url)
for u_request_url, result in sorted(results_count_urls_codes.items()):
    for u_status_k, u_status_v in sorted (result.items()):
        print("%s_count_request_%s_code_%s %d" % (s_name, u_request_url, u_status_k, u_status_v))


# count_urls_codes (codes per url)
for u_request_url, result in sorted(results_count_urls_acodes.items()):
    for u_status_k, u_status_v in sorted (result.items()):
        print("%s_count_request_%s_acode_%s %d" % (s_name, u_request_url, u_status_k, u_status_v))


for u_request_url, timings in sorted(results_timings_urls_all.items()):
    if len(timings):
	results_timings_urls_total.extend(timings)
        print("%s_timings_request_%s %s" % (s_name, u_request_url, ' '.join(timings) ))

if len(results_timings_urls_total):
    print("%s_timings_%s %s" % (s_name, 'total', ' '.join(results_timings_urls_total) ))


for error, value in sorted(results_errors.items()):
    print("%s_error_%s %d" % (s_name, error, value))



sys.exit(0)

