#!/usr/bin/env python

# -*- coding: UTF-8 -*-

import os
import re
import sys

results_errors = { 'parse' : 0 }

meters_codes = [
    'code_1xx',
    'code_2xx',
    'code_3xx',
    'code_4xx',
    'code_5xx',
    'code_unknown',
    'code_all'
]

results_count_codes = dict()
for mc_key in meters_codes:
    results_count_codes[mc_key] = 0


caches_codes = [
    'cache_hit',
    'cache_miss',
    'cache_all'
]

results_count_caches = dict()
for mc_key in caches_codes:
    results_count_caches[mc_key] = 0


service = 'squid';


meters_urls = {
    'get': ['GET'],
    'connect': ['CONNECT'],
    'none': ['NONE'],
}

urls_meters = dict()
for meter,urls in meters_urls.items():
    for url in urls:
        urls_meters[url] = meter

meters_urls['others'] = None


meters_caches = {
    'miss' : ['MISS'],
    'hit' : ['HIT'],
}


caches_meters = dict();
for meter, caches in meters_caches.items():
    for cache in caches:
        caches_meters[cache] = meter


results_count_urls = dict()
results_timings_urls = dict()
results_caches_urls = dict()
for mu_key in meters_urls:
    results_timings_urls[mu_key] = []
    results_count_urls[mu_key] = dict()
    results_caches_urls[mu_key] = dict()
    for mc_key in meters_codes:
        results_count_urls[mu_key][mc_key] = 0
    for mc_key in caches_codes:
        results_caches_urls[mu_key][mc_key] = 0



index_re = re.compile('^\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2},\d{3} +(\d+) (?:\d+\.\d+\.\d+\.\d+|[0-9a-fA-F:]+) ([A-Z_]+)/(\d+) \d+ ([A-Z_]+) ')

for line in sys.stdin:

    line = line.strip()
    matches = index_re.findall(line)

    if len(matches):
        u_time = matches[0][0]
        u_status = matches[0][1]
        u_code = int(matches[0][2])
        b_url = matches[0][3]

        if b_url in urls_meters:
            u_url = urls_meters[b_url]
            results_timings_urls[u_url].append(u_time)
	else:
	    u_url = 'others'

        for p_status in u_status.split('_'):
            if p_status in caches_meters:
                u_cache = caches_meters[p_status]
                results_caches_urls[u_url]['cache_'+u_cache] +=1
                results_caches_urls[u_url]['cache_all'] +=1

	if u_code < 200:
            results_count_urls[u_url]['code_1xx'] += 1
	elif u_code < 300:
            results_count_urls[u_url]['code_2xx'] += 1
	elif u_code < 400:
            results_count_urls[u_url]['code_3xx'] += 1
	elif u_code < 500:
            results_count_urls[u_url]['code_4xx'] += 1
	elif u_code < 600:
            results_count_urls[u_url]['code_5xx'] += 1
	else:
            results_count_urls[u_url]['code_unknown'] += 1

        results_count_urls[u_url]['code_all'] += 1


    else:
#        print line
	results_errors['parse'] += 1

##

for u_url, result in sorted(results_count_urls.items()):
    for u_code, value in sorted (result.items()):
	results_count_codes[u_code] += value
	print("squid_count_request_%s_%s %d" % (u_url, u_code, value))

for u_code, value in sorted(results_count_codes.items()):
    print("squid_count_request_total_%s %d" % (u_code, value))
##

for u_url, result in sorted(results_caches_urls.items()):
    for u_cache, value in sorted (result.items()):
        results_count_caches[u_cache] += value
        print("squid_count_request_%s_%s %d" % (u_url, u_cache, value))

print results_count_caches

for u_cache, value in sorted(results_count_caches.items()):
    print("squid_count_request_total_%s %d" % (u_cache, value))

##

for u_url, result in sorted(results_timings_urls.items()):
    if len(result):
        print("squid_timings_request_%s %s" % (u_url, ' '.join(result) ))




for error, value in sorted(results_errors.items()):
    print("squid_count_access_error_%s %d" % (error, value))



sys.exit(0)

