#!/usr/bin/env python
# encoding: utf-8
# kate: space-indent on; indent-width 4; replace-tabs on;
#
import os, os.path, sys, re
sys.path.insert(0, 'WORKING_DIR')
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))

from time import time, strptime, mktime
from collections import defaultdict
from socket import gethostname
from signal import signal, SIG_IGN, SIGUSR1, SIGINT, SIGHUP, SIGCHLD, SIGPIPE, SIGQUIT, SIGABRT, SIGTERM
from so_log_tail import LogTail

__author__ = "Yaroslav Klimik <klimiky@yandex-team.ru>"
__version__ = "1.0"

KEY_PREFIX = "so.compl"
PARSE_PERIOD = 600
LOGFILE = sys.argv[1] if len(sys.argv) > 1 else '/u0/{}/logs/compl.log'.format(gethostname())
ERRORLOG = "LOGS_DIR/compllog-parser-errors.log"

global check_types, stats, LOG_TAIL

check_types = {
    'S': 'spam',
    'H': 'ham'
}

def initData():
    global stats
    stats = defaultdict(lambda: defaultdict(int))

def send_plotnik(stats):
    buf = ""
    for timestamp, info in sorted(stats.iteritems()):
        for name, count in sorted(info.iteritems()):
            buf += "%s %f %d\n" % (name, float(count), timestamp)
    print buf

def gotSignal(signum, frame):
    global LOG_TAIL
    sys.exit(1)

signal(SIGCHLD, SIG_IGN)
signal(SIGPIPE, gotSignal)
signal(SIGQUIT, gotSignal)
signal(SIGABRT, gotSignal)
signal(SIGTERM, gotSignal)
signal(SIGINT,  gotSignal)
signal(SIGHUP,  gotSignal)


initData()
tail, lastSendTime = '', time()
LOG_TAIL = LogTail(None, log=LOGFILE, id="iex-proxy-compllog", errorlog=ERRORLOG)
for record in LOG_TAIL():
    # string example
    # notification+kjdikmj1pv7d@facebookmail.com      notification+kjdikmj1pv7d@facebookmail.com      S       09.08.2018 21:37:33     19.08.2018 23:59:57     _Hbounce.3ec1050000a0a0140140a83d@mta3.directcrm.ru       info@e.kari.com S       09.07.2020 11:05:41     05.08.2020 22:19:29     _HD_DP_PS_AU_SP_F5_DL_DR        shilova-18055   <3ec1050000a0a0140140a83d@mta3.directcrm.ru>    178.57.72.81    mta3.directcrm.ru       RU NFM Naro-Fominsk     377654582       n5Zgz7Y2Vb-5fdmaoDA     kari KIDS       Ирина, Вам понравится. Откройте!😉👉    ham
    valid, fields = False, []
    try:
        if re.match(r'^\W', record) and not re.search(r'\s+', record):
            LOG_TAIL.error('Unknown row format for row: %s' % record)
            continue
        fields = record.split("\t")
        if len(fields) > 5:
            try:
                fields[4] = int(mktime(strptime(fields[4], "%d.%m.%Y %H:%M:%S"))) if re.match(r'\d\d\.\d\d\.\d\d \d\d:\d\d:\d\d', fields[4]) else time()
            except Exception, e:
                LOG_TAIL.error("Failed to parse message's date: '%s'. Record: %s" % (str(e), record), True)
                try:
                    fields[4] = int(mktime(strptime(fields[5], "%d.%m.%Y %H:%M:%S"))) if re.match(r'\d\d\.\d\d\.\d\d \d\d:\d\d:\d\d', fields[5]) else time()
                except Exception, e:
                    LOG_TAIL.error("Failed to parse complaint's date: '%s'. Record: %s: %s" % (str(e), record), True)
            if re.match(r'^[A-Z]$', fields[2]):
                if tail:
                    fields[len(fields):len(fields)] = tail.split("\t")
                    tail = ''
                valid = True
        else:
            LOG_TAIL.error("Strange line: %s" % record)
        if not valid:
            tail += ' ' + record.strip()
            continue
    except Exception, e:
        LOG_TAIL.error("Exception '%s' while checking log record: %s" % (str(e), record), True)
    if len(fields) < 5:
        continue
    try:
        t = int(fields[4] / 60) * 60
        if fields[2] in check_types:
            tp = check_types[fields[2]]
        else:
            tp = 'unknown'
            LOG_TAIL.error('NotValidType for row: %s' % record)
        tp = check_types[fields[2]] if fields[2] and fields[2] in check_types else 'unknown'
        msg_time = int(mktime(strptime("%s" % fields[3], "%d.%m.%Y %H:%M:%S")) / PARSE_PERIOD) * PARSE_PERIOD
        compltypes = set(filter(len, fields[5].split("_")))
        if "XX" in compltypes:
            if "IC" in compltypes:
                stats[msg_time]["%s.%s.type.%s" % (KEY_PREFIX, tp, "IC")] += 1
            stats[msg_time]["%s.%s.type.%s" % (KEY_PREFIX, tp, "XX")] += 1
            continue
        stats[msg_time]["%s.%s.type.%s" % (KEY_PREFIX, tp, "TOTAL")] += 1
        if len(compltypes) < 1 or len(compltypes - set(['AU', 'SN', 'F0', 'F1', 'F2', 'F3', 'F4', 'F5', 'F6', 'F7', 'F8', 'F9'])) < 1:
            stats[msg_time]["%s.%s.type.%s" % (KEY_PREFIX, tp, "NOTYPE")] += 1
            continue
        for compltype in compltypes:
            stats[msg_time]["%s.%s.type.%s" % (KEY_PREFIX, tp, compltype)] += 1
        WS = "WS" in compltypes
        WH = "WH" in compltypes
        IM = "IM" in compltypes
        stats[msg_time]["%s.%s.type.WS%d_IM%d" % (KEY_PREFIX, tp, WS, IM)] += 1
        stats[msg_time]["%s.%s.type.WH%d_IM%d" % (KEY_PREFIX, tp, WH, IM)] += 1
    except Exception, e:
        LOG_TAIL.error("Error while parsing compl.log: %s" % str(e), True)
    if time() - lastSendTime > 60:
        stats2 = stats.copy()
        initData()
        send_plotnik(stats2)
        lastSendTime = time()

send_plotnik(stats)
