#!/usr/bin/env python

# -*- coding: utf-8 -*-
"""upper access_log frontend"""

from __future__ import print_function
from __future__ import unicode_literals

import sys
import re

RE = re.compile(r'^(?P<remote_ip>\S*) \S* \S* \[(?P<datetime>\S*) (?P<timezone>\S*)\] '
    r'"(?P<method>\S*) (?P<url>\S*) (?P<http_version>\S*)" '
    r'(?P<status_code>\S*) (?P<response_size>\S*) '
    r'"(?P<referrer>(:?[^\\"]|\\.)*)" "(?P<user_agent>(:?[^\\"]|\\.)*)" '
    r'"(?P<host>[^,]*),(?P<source_port>(\d*|-))" '
    r'"(?P<forwarded_for>(:?[^\\"]|\\.)*)" "(?P<cookie>(:?[^\\"]|\\.)*)" '
    r'(?P<timestamp>\S*) "(?P<response_time_s>\d*)" (?P<response_time_us>\d*) \d* '
    r'(?P<reqid>\S*) (?P<pid>\d*) (?P<balancer>\S*) '
    r'\S* \S* \S* \S* \S* (?P<suspected_robot>\S*) (?P<internal_request>\S*)')

def parse_line(line):
    m = RE.match(line)
    if not m:
        return None
    return m.groupdict()


FIELDS = ["remote_ip", "datetime", "method", "status_code", "response_size", "response_time_s", "response_time_us", "url", "host", "balancer", "reqid", "referrer", "user_agent_safe", "suspected_robot", "internal_request"]

def parse(input):
    print('\t'.join("{1}({0})".format(*x) for x in enumerate(FIELDS)))
    fmt = '\t'.join("{%s}" % f for f in FIELDS)
    for line in input:
        r = parse_line(line)
        if not r:
            print("ERROR: Log parse error: {}".format(line), file=sys.stderr)
            continue
        if 'user_agent' in r:
            r['user_agent_safe'] = re.sub(' ', '_', r['user_agent'])
        print(fmt.format(**r))


def main():
    f = sys.stdin
    if len(sys.argv) > 1:
        f = open(sys.argv[1])
    parse(f)

if __name__ == '__main__':
    main()
