import sys
import re
from base64 import b64decode
import gzip
import io


import dateutil.parser


import click


# example:
# 2019-11-11:18:35:16 ppcdev6.yandex.ru 971128 java-jobs H4sIALR/yV0AA+ydW3PcNpaA...==
LINE_RE = re.compile(r'^(\d\d\d\d-\d\d-\d\d)[: ](\d\d:\d\d:\d\d)\s+(\S+)\s+(\d+)\s+(\S+)\s+(\S+)$')
DATETIME_RE = re.compile(r'^(\d\d\d\d-\d\d-\d\d)[: ](\d\d:\d\d:\d\d)\s+')


def validate_datetime(ctx, param, value):
    if value is None:
        return None
    try:
        return dateutil.parser.parse(value)
    except ValueError:
        raise click.BadParameter("Incorrect datetime format, see dateutil.parser documentation")


@click.command('thread-dumps', help="Tool for filtering java thread-dumps logs (in base64/gzip format)", context_settings=dict(help_option_names=['-h', '--help']))
@click.option('--from', '-f', 'from_datetime', default=None, callback=validate_datetime,
              help="from datetime")
@click.option('--to', '-t', 'to_datetime', default=None, callback=validate_datetime,
              help="to datetime")
@click.option('--thread-name', '-T', 'thread_filter', default=None,
              help="regexp for filtering threads by name")
@click.option('--stack-trace', '-S', 'stack_filter', default=None,
              help="regexp for filtering stacktraces")
@click.option('--service', 'service_filter', default=None,
              help="regexp for filtering by services")
@click.option('--count', '-n', default=None, type=int,
              help="count of dumps to print")
def cli(from_datetime, to_datetime, thread_filter, stack_filter, service_filter, count):
    thread_filter_rx = re.compile('^"[^"]*' + thread_filter + '[^"]*".*') if thread_filter is not None else None
    stack_filter_rx = re.compile(stack_filter) if stack_filter is not None else None
    service_filter_rx = re.compile(service_filter) if service_filter is not None else None

    found_count = 0
    for line in sys.stdin:
        if count is not None and found_count >= count:
            break

        dt = parse_datetime(line)

        if dt is None:
            print("Can't parse line " + line)
            continue

        if not ((from_datetime is None or dt >= from_datetime)
                     and (to_datetime is None or dt <= to_datetime)):
            continue

        log_line = parse_line(line)
        if service_filter_rx and not service_filter_rx.search(log_line.service):
            continue

        print str(log_line.dt) + " " + log_line.service
        print

        for th in log_line.thread_dumps:
            good = (not thread_filter_rx or thread_filter_rx.match(th)) \
                and (not stack_filter_rx or stack_filter_rx.search(th))
            if good:
                print(th)
                print

        found_count += 1


def parse_datetime(line):
    m = DATETIME_RE.match(line)
    if m:
        return dateutil.parser.parse(m.group(1) + " " + m.group(2))
    else:
        return None


def parse_line(line):
    m = LINE_RE.match(line)
    if m:
        dt = dateutil.parser.parse(m.group(1) + " " + m.group(2))
        hostname = m.group(3)
        pid = m.group(4)
        service = m.group(5)
        data = gunzip(b64decode(m.group(6)))
        return LogLine(dt, hostname, pid, service, parse_dump(data))
    else:
        return None


def parse_dump(data):
    return [x for x in data.split('\n\n') if x.startswith('"')]


class LogLine(object):
    def __init__(self, dt, hostname, pid, service, thread_dumps):
        self.dt = dt
        self.hostname = hostname
        self.pid = pid
        self.service = service
        self.thread_dumps = thread_dumps


def gunzip(data):
    in_ = io.BytesIO()
    in_.write(data)
    in_.seek(0)
    with gzip.GzipFile(fileobj=in_, mode='rb') as fo:
        gunzipped_bytes_obj = fo.read()

    return gunzipped_bytes_obj.decode()
