#!/usr/bin/env python
# encoding: utf-8
# kate: space-indent on; indent-width 4; replace-tabs on;
#
import os, os.path, sys, json, re, ConfigParser
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))

from time import time, strptime, mktime
from subprocess import Popen, PIPE
from collections import defaultdict
from socket import getfqdn
from so_log_tail import LogTail

__author__ = "Yaroslav Klimik <klimiky@yandex-team.ru>"
__version__ = "1.0"

KEY_PREFIX = "so.compl"
DOMAIN_RE = re.compile(r'(?:(?:[a-z0-9](?:[a-z0-9-]{0,61}[a-z0-9])?|xn\-\-[a-z0-9-]+)\.)+(?:xn\-\-[a-z0-9-]+|[a-z]+)', re.I)
EMAIL_RE = re.compile(r'[\w\+\-\.=]+\@{0}'.format(DOMAIN_RE.pattern), re.I)
MSGID_RE = re.compile(r'<?[a-zA-Z0-9_\@\.-]+>?')

global cfg, q, check_types, route, svc, prefx, data, stats, LOG_TAIL

check_types = {
    'S': 'spam',
    'H': 'ham'
}
cfg = ConfigParser.SafeConfigParser()
cfg.optionxform = str
group = 'compl'
route = group.upper()
prefx = route
svc = "compllog"

cfg_file = sys.argv[1] if len(sys.argv) > 1 else 'WORKING_DIR/so-compllog-parser.ini'
cfg.read(cfg_file)
q = dict(cfg.items('general'))
q['dataResult'] = {}
q['log'] = 'compllog'
q['logData'] = {}
q['machine'] = getfqdn()
q['prefix'] = route
if cfg.has_option('general', 'graphitePrefix'):
    KEY_PREFIX = cfg.get('general', 'graphitePrefix')

def initData():
    global stats, data
    stats = defaultdict(lambda: defaultdict(int))
    data = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))

def send_monstats(data):
    global LOG_TAIL
    for s in data.keys():
        if s not in q['dataResult']:
            q['dataResult'][s] = []
        for (t, tmp) in data[s].iteritems():
            tmp['time'] = t
            q['dataResult'][s].append(dict(tmp))
    try:
        (output, err) = Popen(['WORKING_DIR/so_logdata_sender.py'], stdin=PIPE, stdout=PIPE, stderr=PIPE, shell=True).communicate(input=json.dumps(q))
        if err:
            LOG_TAIL.error("Error while sending parsed data: %s. Output: %s" % (err, output))
    except Exception, e:
        LOG_TAIL.error("Exception while sending parsed data: %s" % str(e), True)

def send_plotnik(stats):
    buf = ""
    for timestamp, info in sorted(stats.iteritems()):
        for name, count in sorted(info.iteritems()):
            buf += "%s %f %d\n" % (name, float(count), timestamp)
    print buf

def gotSignal(signum, frame):
    global LOG_TAIL, data
    if data and len(data.keys()) > 0:
        send_monstats(data)
    sys.exit(1)

signal(SIGCHLD, SIG_IGN)
signal(SIGPIPE, gotSignal)
signal(SIGQUIT, gotSignal)
signal(SIGABRT, gotSignal)
signal(SIGTERM, gotSignal)
signal(SIGINT,  gotSignal)
signal(SIGHUP,  gotSignal)

initData()
tail, lastSendTime = '', time()
LOG_TAIL = LogTail(cfg_file)
for record in LOG_TAIL():
    # string example
    # notification+kjdikmj1pv7d@facebookmail.com      notification+kjdikmj1pv7d@facebookmail.com      S       09.08.2018 21:37:33     19.08.2018 23:59:57     _HD_DL_ZT_DR    cansuyum34      1027284425      <3f45b36f0aa97f21dc49eb8e116686fb@2d4a6334d2f2bb73c1c106ccaf5b5e83c5a47abe274260dadc6a33286199d452>     66.220.144.144  66-220-144-144.mail-mail.facebook.com   2401:db00:2131:201a:face:0:37:0 -       US      -       0       513163189       bT8tNZQn5K-bYvuCcQJ     tr      ' Cemile Arslaner '     'Cemile Arslaner size bir mesaj g?nderdi.'      2
    valid, fields, et = False, [], {}
    try:
        if re.match(r'^\W', record) and not re.search(r'\s+', record):
            LOG_TAIL.error('Unknown row format for row: %s' % record)
            continue
        fields = record.split("\t")
        if len(fields) > 5:
            if fields[4].strip() == '-':
                et['errorNoCmplDate'], t = 1, time()
            elif re.match(r'\d\d\.\d\d\.\d\d \d\d:\d\d:\d\d', fields[4]):
                t = int(mktime(strptime(fields[4], "%d.%m.%Y %H:%M:%S")))
            else:
                et['errorNotValidCmplDate'], t = 1, time()
            fields[4] = t
            if re.match(r'^[A-Z]$', fields[2]):
                if tail:
                    fields[len(fields):len(fields)] = tail.split("\t")
                    tail = ''
                valid = True
            else:
                et['errorNotValidType'] = 1
        else:
            LOG_TAIL.error("Strange line: %s" % record)
        if not valid:
            tail += ' ' + record.strip()
            continue
    except Exception, e:
        LOG_TAIL.error("Exception '%s' while checking log record: %s" % (str(e), record), True)
        et['errorNotValidCmplDate'] = 1
    if len(fields) < 5:
        continue
    try:
        t = int(fields[4] / 60) * 60
        if fields[2] in check_types:
            tp = check_types[fields[2]]
        else:
            et['errorNotValidType'], tp = 1, 'unknown'
            LOG_TAIL.error('NotValidType for row: %s' % record)
        tp = check_types[fields[2]] if fields[2] and fields[2] in check_types else 'unknown'
        svc_types, svc_errors = "{0}_{1}_types".format(svc, tp), "{0}_{1}_errors".format(svc, tp)
        if fields[2] in check_types:
            data[svc_types][t][tp] += 1
        msg_time = int(mktime(strptime("%s" % fields[3], "%d.%m.%Y %H:%M:%S")) / 600) * 600
        compltypes = set(filter(len, fields[5].split("_")))
        if "XX" in compltypes:
            if "IC" in compltypes:
                stats[msg_time]["%s.%s.type.%s" % (KEY_PREFIX, tp, "IC")] += 1
            stats[msg_time]["%s.%s.type.%s" % (KEY_PREFIX, tp, "XX")] += 1
            continue
        stats[msg_time]["%s.%s.type.%s" % (KEY_PREFIX, tp, "TOTAL")] += 1
        if len(compltypes) < 1 or len(compltypes - set(['AU', 'SN', 'F0', 'F1', 'F2', 'F3', 'F4', 'F5', 'F6', 'F7', 'F8', 'F9'])) < 1:
            stats[msg_time]["%s.%s.type.%s" % (KEY_PREFIX, tp, "NOTYPE")] += 1
            continue
        for compltype in compltypes:
            stats[msg_time]["%s.%s.type.%s" % (KEY_PREFIX, tp, compltype)] += 1
        WS = "WS" in compltypes
        WH = "WH" in compltypes
        IM = "IM" in compltypes
        stats[msg_time]["%s.%s.type.WS%d_IM%d" % (KEY_PREFIX, tp, WS, IM)] += 1
        stats[msg_time]["%s.%s.type.WH%d_IM%d" % (KEY_PREFIX, tp, WH, IM)] += 1

        data[svc_types][t]['total'] += 1
        if fields[0].strip() == '-' and fields[1].strip() == '-':
            et['errorNoSender'] = 1
        else:
            if not EMAIL_RE.match(fields[0].strip()):
                et['errorNotValidSender'] = 1
            if not EMAIL_RE.match(fields[1].strip()):
                et['errorNotValidFrom'] = 1
        if fields[5].find('_') > -1:
            for t1 in fields[5].split('_'):
                if t1:
                    data[svc_types][t][t1] += 1
        else:
            et['errorNoTypes'] = 1
            data[svc_types][t]['notype'] += 1
        if not (fields[6] and fields[6] != '-'):
            et['errorNoLogin'] = 1
        if not (fields[7] and fields[7] != '-'):
            et['errorNoSUID'] = 1
        elif not re.match(r'^\d+$', fields[7]):
            et['errorNotValidSUID'] = 1
        if not (fields[8] and fields[8] != '-'):
            et['errorNoMessageId'] = 1
        elif not MSGID_RE.match(fields[8]):
            et['errorNotValidMessageId'] = 1
        if not (fields[9] and fields[9] != '-'):
            et['errorNoSenderIP'] = 1
        if not (fields[10] and fields[10] != '-'):
            et['errorNoSenderHost'] = 1
        if not (fields[11] and fields[11] != '-'):
            et['errorNo1stSenderIP'] = 1
        if not (fields[12] and fields[12] != '-'):
            et['errorNo1stSenderHost'] = 1
        if not (fields[13] and fields[13] != '-'):
            et['errorNoSenderGeo'] = 1
        if not (fields[14] and fields[14] != '-'):
            et['errorNo1stSenderGeo'] = 1
        if len(fields) > 21 and fields[21] and fields[21] != '-':
            if fields[21] == '4' or fields[21] == 'YES':
                data[svc_types][t]['SPAM'] += 1
            elif fields[21] == '1' or fields[21] == 'NO':
                data[svc_types][t]['HAM'] += 1
            elif fields[21] == '2' or fields[21] == 'DLVR':
                data[svc_types][t]['DLVR'] += 1
            elif fields[21] == '256':
                data[svc_types][t]['MALIC'] += 1
            else:
                data[svc_types][t]['UNKNOWN'] += 1
        else:
            et['errorNoSpamFlag'] = 1
        for t1 in et.keys():
            data[svc_errors][t][t1] += 1
    except Exception, e:
        print >>sys.stderr, "Error while parsing compl.log: %s" % str(e)
    if time() - lastSendTime > int(q.get('sendTimePeriod', 60)):
        stats2 = stats.copy()
        data2 = data.copy()
        initData()
        #send_monstats(data2)
        send_plotnik(stats2)
        lastSendTime = time()
#send_monstats(data)
send_plotnik(stats)
