#!/usr/bin/env python

# -*- coding: UTF-8 -*-

import os
import re
import sys
import pprint
from collections import defaultdict


results_errors = { 'parse' : 0 }

s_name = 'uploader';


meters_urls = {'get_zip-folder' : ['GET /zip-folder'],
    'post_upload-from-service' : ['POST /upload-from-service'],
    'get_zip-folder-public' : ['GET /zip-folder-public'],
    'get_regenerate-preview' : ['GET /regenerate-preview'],
    'put_patch-target' : ['PUT /patch-target'],
    'post_patch-url' : ['POST /patch-url'],
    'get_request-status' : ['GET /request-status'],
    'post_upload-url' : ['POST /upload-url'],
    'head_upload-target' : ['HEAD /upload-target'],
    'put_upload-target' : ['PUT /upload-target'],
    'post_upload-target' : ['POST /upload-target'],
    'get_generate-preview' : ['GET /generate-preview'],
    'loading-status' : ['GET /loading-status', 'POST /loading-status'],
}

urls_meters = dict();
for meter,urls in meters_urls.items():
    for url in urls:
        urls_meters[url] = meter
meters_urls['others'] = None

timings_urls_all = [ 'get_regenerate-preview', 'post_patch-url', 'get_request-status', 'post_upload-url', 'head_upload-target', 'get_generate-preview' ]
timings_urls_lt1mb = [ 'get_zip-folder', 'post_upload-from-service', 'get_zip-folder-public', 'put_patch-target', 'put_upload-target', 'post_upload-target' ]
count_urls_codes = [ 'get_zip-folder', 'post_upload-from-service', 'get_zip-folder-public', 'get_regenerate-preview', 'put_patch-target', 'post_patch-url', 'get_request-status', 'post_upload-url', 'head_upload-target', 'put_upload-target', 'get_generate-preview', 'post_upload-target' ]

results_count_urls_codes = dict()
for mu_key in count_urls_codes:
    results_count_urls_codes[mu_key] = defaultdict(int)

results_timings_urls_all = defaultdict(list)
results_timings_urls_lt1mb = defaultdict(list)


#####

index_re = re.compile(': \"([A-Z]+ \/[^\/?\"]*)?[\/?\"].* (\d{3}) (?:\d+\.\d+\.\d+\.\d+|[0-9a-fA-F:]+) (\-?\d+) (\d+) (\d+\.\d+)$')

for line in sys.stdin:

    line = line.strip()
    matches = index_re.findall(line)

    if len(matches):
        b_url = matches[0][0]
        u_code = int(matches[0][1])
        u_bytes_sent = int(matches[0][2])
        u_request_length = int(matches[0][3])
        u_time = matches[0][4]


        if b_url in urls_meters:
            u_url = urls_meters[b_url]
    else:
        u_url = 'others'

        if u_url in timings_urls_all:
            results_timings_urls_all[u_url].append(u_time)
        elif (u_url in timings_urls_lt1mb) and (u_bytes_sent < 1048576):
            results_timings_urls_lt1mb[u_url].append(u_time)

        if u_url in count_urls_codes:
            results_count_urls_codes[u_url][u_code] += 1

    else:
#        print line
        results_errors['parse'] += 1





# count_urls_codes (codes per url)
for u_url, result in sorted(results_count_urls_codes.items()):
    for u_code, value in sorted (result.items()):
        print("%s_count_request_%s_code_%s %d" % (s_name, u_url, u_code, value))


for u_url, result in sorted(results_timings_urls_lt1mb.items()):
    if len(result):
        print("%s_timings_request_%s_lt1mb %s" % (s_name, u_url, ' '.join(result) ))


for u_url, result in sorted(results_timings_urls_all.items()):
    if len(result):
        print("%s_timings_request_%s %s" % (s_name, u_url, ' '.join(result) ))



for error, value in sorted(results_errors.items()):
    print("%s_count_access_error_%s %d" % (s_name, error, value))


sys.exit(0)

