#!/usr/bin/env python3.5
"""
Parse logs from stdin supplied by push-client and convert them to tskv format.
"""

import argparse
import logging
import re
import socket
import sys
import time
from datetime import datetime, timedelta, timezone
from os import environ

ORIGIN = 'clickhouse'
RECORD_RE = re.compile(
    r'^(?P<datetime>\S+ \S+)\s+\[ (?P<thread>\S+) \]\s+'
    '<(?P<severity>\S+)>\s+(?P<component>[^:]*):\s+(?P<message>\S.*)')
IN_DATE_FORMAT = '%Y.%m.%d %H:%M:%S.%f'
OUT_DATE_FORMAT = '%Y-%m-%d %H:%M:%S.%f %z'
LOCAL_TZ = timezone(
    timedelta(seconds=-1 * (time.altzone if time.daylight else time.timezone)))


def main():
    """
    Entry point.
    """
    parser = argparse.ArgumentParser()
    parser.add_argument('-c', '--cluster', type=str, default='',
                        help='Cluster name')
    args = parser.parse_args()

    logging.basicConfig(format='%(asctime)s %(message)s')

    tracking_data = {
        'cluster': args.cluster,
        'hostname': socket.getfqdn(),
        'origin': ORIGIN,
        'log_format': environ.get('LOG_ENVIRONMENT', 'dbaas_int_log'),
    }

    for record in parse_records(sys.stdin, RECORD_RE):
        try:
            record['data'].update(tracking_data)
            process_datetime(record, IN_DATE_FORMAT, OUT_DATE_FORMAT)
            print_record(record)
        except Exception:
            logging.exception('Failed to handle log record %s', record)


def parse_records(source, regex, message_key='message'):
    """
    Parse log records from file-like object 'source' based on pattern 'regex'.
    """
    line_id_re = re.compile(r'^(\d+;\d+;\d+;)')

    record = None
    for in_line in source:
        try:
            line_id, data = line_id_re.split(in_line, 1)[1:]
            match = re.match(regex, data)
            if match:
                if record:
                    yield record
                record = dict(key=line_id, data=match.groupdict())
            else:
                if record:
                    record['key'] = line_id
                    record['data'][message_key] += r'\n' + data.strip('\n')
        except Exception:
            logging.exception('Failed to parse log line: %s', in_line)

    if record:
        yield record


def process_datetime(record, in_format, out_format, datetime_key='datetime'):
    """
    Parse datetime and add additional time-related fields to record: timestamp,
    ms and log_time.
    """
    dt = datetime.strptime(record['data'][datetime_key], in_format)
    if dt.tzinfo is None:
        dt = dt.replace(tzinfo=LOCAL_TZ)
    timestamp = dt.timestamp()
    record['data'].update({
        'timestamp': int(timestamp),
        'ms': '%d' % (timestamp % 1 * 1000),
        'log_time': dt.strftime(out_format),
    })


def print_record(record):
    """
    Print record in tskv format with line id prefix.
    """
    kv_list = ['%s=%s' % (k, tskv_quote(str(v)))
               for k, v in record['data'].items()]
    print(
        '{line_id}tskv\t{tskv}'.format(
            line_id=record['key'],
            tskv='\t'.join(kv_list)),
        flush=True)


def tskv_quote(string):
    """
    Quote string to use as a tskv value.
    """
    patterns = [
        ['\t', ' '],
        ['=', r'\='],
        ['\n', r'\n']
    ]
    for pattern, repl in patterns:
        string = string.replace(pattern, repl)
    return string


if __name__ == '__main__':
    main()
