from collections import defaultdict
import itertools
import os
import re


def get_statistics(request_log, ignore_handlers=None):
    """
    Get network statistics by reading phantom request.log in logger_default_t format
    https://a.yandex-team.ru/arc/trunk/arcadia/contrib/phantom/phantom/io_stream/proto_http/logger_default.C?rev=r7838523#L29
    124	2021-02-08 15:01:18.019 +0300	::1:36780	523	29	180
    GET /ping HTTP/1.1
    Accept-Encoding: identity
    Host: yabstat.yandex.ru
    Connection: close
    User-Agent: Python-urllib/2.7
    return defaultdict. key: handler, value: {request_size: int, response_size: int, count: int}
    """
    if ignore_handlers is None:
        ignore_handlers = []

    result = defaultdict(lambda: {
        'request_size': 0,
        'response_size': 0,
        'count': 0,
    })
    with open(request_log, 'rb') as f:
        while True:
            hdr_str = ''
            while not hdr_str.strip():
                hdr_str = f.readline()
                if not hdr_str:
                    return result
            hdr = hdr_str.split('\t')
            request_size = int(hdr[0])
            response_size = int(hdr[-2])
            request_line = f.readline()
            handler = request_line.split()[1][1:].split('/')[0]
            if handler not in ignore_handlers:
                t = result[handler]
                t['request_size'] += request_size
                t['response_size'] += response_size
                t['count'] += 1
            f.seek(request_size - len(request_line), os.SEEK_CUR)
    return result


def iterate(log_fileobj, header_separator='\t', single_hdr_tail=False):
    """
    Iterates phantom request.log / phantom_benchmark ammo (they differ in header_separator)
    Yields tuples (request, log_header[1:])
    """
    split_parttern = re.compile(header_separator)
    while True:
        hdr_str = ''
        while not hdr_str.strip():
            hdr_str = log_fileobj.readline()
            if not hdr_str:
                raise StopIteration

        if single_hdr_tail:
            hdr = split_parttern.split(hdr_str.rstrip('\n'), maxsplit=1)
        else:
            hdr = split_parttern.split(hdr_str.rstrip('\n'))
        try:
            size = int(hdr[0])
        except ValueError as exc:
            raise ValueError('Bad header line "{}": {}'.format(hdr, exc))
        request = log_fileobj.read(size)
        if len(request) != size:
            raise StopIteration
        if single_hdr_tail:
            yield request, (hdr[1:] or [''])[0]
        else:
            yield request, hdr[1:]


def write(logfile, request, hdr_tail):
    """
    Write request in logger_default_t format
    """
    print >>logfile, '\t'.join(itertools.chain([str(len(request))], hdr_tail))
    print >>logfile, request


def write_stpd(logfile, request, timestamp, tag=''):
    """
    Write request in io_benchmark source log format (aka stpd)
    """
    print >>logfile, ' '.join((str(len(request)), str(timestamp), tag))
    print >>logfile, request
