#!/usr/bin/env python2.7
# encoding: utf-8
# kate: space-indent on; indent-width 4; replace-tabs on;
#
import os, os.path, sys, json, re, ConfigParser
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))

from time import time, strptime, mktime
from collections import defaultdict
from subprocess import Popen, PIPE
from socket import getfqdn
from signal import signal, SIG_IGN, SIGUSR1, SIGINT, SIGHUP, SIGCHLD, SIGPIPE, SIGQUIT, SIGABRT, SIGTERM
from common import writelog, getRoute, getGroup4Host
from so_log_tail import LogTail

__author__ = "Yaroslav Klimik <klimiky@yandex-team.ru>"
__version__ = "1.0"

global cfg, q, check_types, route, svc, prefx, data, LOG_TAIL

check_types = {
    'S': 'spam',
    'H': 'ham',
    'M': 'malic',
    'D': 'delivery'
}
DOMAIN_RE = re.compile(r'(?:(?:[a-z0-9](?:[a-z0-9-]{0,61}[a-z0-9])?|xn\-\-[a-z0-9-]+)\.)+(?:xn\-\-[a-z0-9-]+|[a-z]+)', re.I)
EMAIL_RE = re.compile(r'[\w\+\-\.=]+\@{0}'.format(DOMAIN_RE.pattern), re.I)
MSGID_RE = re.compile(r'<?[a-zA-Z0-9_\@\.-]+>?')
LOG_TAIL = None
cfg = ConfigParser.SafeConfigParser()
cfg.optionxform = str
route = getRoute()
if not route:
    route = sys.argv[2] if len(sys.argv) > 2 else 'in'
prefx = route.upper() if route != 'in' and route != 'out' else ''
group = re.sub(r'^mail_?so_?(.*?)(?:daemon)?$', r'\1', getGroup4Host(getfqdn()))
svc = "shortlog_%s" % route
if group:
    svc = "shortlog_%s" % group
else:
    group = route
route = group.lower()
cfg_file = sys.argv[1] if len(sys.argv) > 1 else 'WORKING_DIR/so-shortlog-parser.ini'
cfg.read(cfg_file)
q = dict(cfg.items('general'))
q['dataResult'] = {}
q['log'] = 'shortlog'
q['logData'] = {}
q['machine'] = getfqdn()
q['prefix'] = route.upper() if route != 'in' and route != 'out' else ''

def initData():
    global data
    data = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))

def send_monstats(data):
    global LOG_TAIL
    for s in data.keys():
        if s not in q['dataResult']:
            q['dataResult'][s] = []
        for (t, tmp) in data[s].iteritems():
            tmp['time'] = t
            q['dataResult'][s].append(dict(tmp))
    try:
        (output, err) = Popen(['WORKING_DIR/so_logdata_sender.py'], stdin=PIPE, stdout=PIPE, stderr=PIPE, shell=True).communicate(input=json.dumps(q))
        if err:
            LOG_TAIL.error("Error while sending parsed data: %s. Output: %s" % (err, output))
    except Exception, e:
        LOG_TAIL.error("Exception while sending parsed data: %s" % str(e), True)

def gotSignal(signum, frame):
    global LOG_TAIL, data
    if data and len(data.keys()) > 0:
        send_monstats(data)
    sys.exit(1)

signal(SIGCHLD, SIG_IGN)
signal(SIGPIPE, gotSignal)
signal(SIGQUIT, gotSignal)
signal(SIGABRT, gotSignal)
signal(SIGTERM, gotSignal)
signal(SIGINT,  gotSignal)
signal(SIGHUP,  gotSignal)
initData()
LOG_TAIL = LogTail(cfg_file)
lastSendTime = time()
for record in LOG_TAIL():
    # string example
    #root@catalogia-phrases-bs22i.yandex.ru  root@catalogia-phrases-bs22i.yandex.ru  H       02.05.2018 00:29:01     <20180501212901.2F6CF341F7D@catalogia-phrases-bs22i.yandex.ru>  _ZY_DF_1K_DP_F4 -       -       RU      -       -       -       bm-root@mail.yandex-team.ru_1120000000133132    iva1-0236-msk-iva-so-spdaemon-test-13140.gencfg-c.yandex.net    1120000000017159        jOPtdO2Q9q-T1PeD0H2     ru      'Cron Daemon'   'Cron <root@catalogia-phrases-bs22i> ip -6 r replace $(ip -6 r sh default | perl -lpe  s/mtu \d+/mtu '  firstmai;trust_4
    valid, fields, et = False, [], {}
    try:
        fields = record.split("\t")
        if len(fields) < 7 or route == 'out' and not all(fields[:9]) or route != 'out' and not all(fields[:7]):
            LOG_TAIL.error('Unknown row format for row: %s' % record)
            continue
        if route == 'out' and len(fields) > 22 or route != 'out' and len(fields) > 19:
            t = time()
            try:
                t = int(mktime(strptime(fields[6 if route == 'out' else 3], "%d.%m.%Y %H:%M:%S")))
                if re.match(r'^[A-Z]$', fields[3 if route == 'out' else 2]):
                    valid = True
                    fields[6 if route == 'out' else 3] = t
            except Exception, e:
                LOG_TAIL.error("Exception: %s" % str(e), True)
                data[svc][t]['errorNotValidDate'] += 1
                et['errorNotValidDate'] = 1
        else:
            LOG_TAIL.error("Strange line: %s" % record)
        if not valid:
            continue
    except Exception, e:
        LOG_TAIL.error("Exception '%s' while checking log record: %s" % (str(e), record), True)
    t = fields[6 if route == 'out' else 3]
    t0, types, tp, geo, notype = int(t / 60) * 60, [], 'unknown', 'nozone', False
    if route == 'out':
        if fields[0].strip() == '-':
            data[svc][t]['errorNoSUID'] += 1
            et['errorNoSUID'] = 1
        elif not re.match(r'^\d+$', fields[0]):
            data[svc][t]['errorNotValidSUID'] += 1
            et['errorNotValidSUID'] = 1
        if fields[1].strip() == '-' and fields[2].strip() == '-':
            data[svc][t]['errorNoSender'] += 1
            et['errorNoSender'] = 1
        else:
            if not EMAIL_RE.match(fields[1].strip()):
                data[svc][t]['errorNotValidSender'] += 1
                et['errorNotValidSender'] = 1
            if not EMAIL_RE.match(fields[2].strip()):
                data[svc][t]['errorNotValidFrom'] += 1
                et['errorNotValidFrom'] = 1
        if fields[3] in check_types:
            tp = check_types[fields[3]]
            data[svc][t][tp] += 1
        else:
            data[svc][t]['errorNotValidType'] += 1
            et['errorNotValidType'] = 1
            LOG_TAIL.error('NotValidTypes for row: %s' % record)
        if fields[4].strip() == '-':
            data[svc][t]['errorNoKarma'] += 1
            et['errorNoKarma'] = 1
        if fields[5].strip() == '-':
            data[svc][t]['errorNoCreationDate'] += 1
            et['errorNoCreationDate'] = 1
        else:
            try:
                t1 = strptime(fields[5].strip(), "%d.%m.%Y")
            except ValueError:
                data[svc][t]['errorNotValidCreationDate'] += 1
                et['errorNotValidCreationDate'] = 1
        if fields[7].strip() == '-':
            data[svc][t]['errorNoMessageId'] += 1
            et['errorNoMessageId'] = 1
        elif not MSGID_RE.match(fields[7]):
            data[svc][t]['errorNotValidMessageId'] += 1
            et['errorNotValidMessageId'] = 1
        if fields[8].find('_') > -1:
            for t1 in fields[8].split('_'):
                if t1:
                    data[svc][t][t1] += 1
                    types.append(t1)
        else:
            data[svc][t]['errorNoTypes'] += 1
            et['errorNoTypes'] = 1
            notype = True
        if not (fields[9] and fields[9] != '-'):
            data[svc][t]['errorNoSenderIP'] += 1
            et['errorNoSenderIP'] = 1
        if not (fields[10] and fields[10] != '-'):
            data[svc][t]['errorNoSenderHost'] += 1
            et['errorNoSenderHost'] = 1
        if fields[11] and fields[11] != '-':
            geo = fields[11]
        else:
            data[svc][t]['errorNoSenderGeo'] += 1
            et['errorNoSenderGeo'] = 1
        if not (fields[15] and fields[15] != '-'):
            data[svc][t]['errorNoRecipients'] += 1
            et['errorNoRecipients'] = 1
    else:
        if fields[0].strip() == '-' and fields[1].strip() == '-':
            data[svc][t]['errorNoSender'] += 1
            et['errorNoSender'] = 1
        else:
            if not EMAIL_RE.match(fields[0].strip()):
                data[svc][t]['errorNotValidSender'] += 1
                et['errorNotValidSender'] = 1
            if not EMAIL_RE.match(fields[1].strip()):
                data[svc][t]['errorNotValidFrom'] += 1
                et['errorNotValidFrom'] = 1
        if fields[2] in check_types:
            tp = check_types[fields[2]]
            data[svc][t][tp] += 1
        else:
            data[svc][t]['errorNotValidType'] += 1
            et['errorNotValidType'] = 1
            LOG_TAIL.error('NotValidTypes for row: %s' % record)
        if fields[4] == '-':
            data[svc][t]['errorNoMessageId'] += 1
            et['errorNoMessageId'] = 1
        elif not MSGID_RE.match(fields[4]):
            data[svc][t]['errorNotValidMessageId'] += 1
            et['errorNotValidMessageId'] = 1
        if fields[5].find('_') > -1:
            for t1 in fields[5].split('_'):
                if t1:
                    data[svc][t][t1] += 1
                    types.append(t)
        else:
            data[svc][t]['errorNoTypes'] += 1
            et['errorNoTypes'] = 1
            notype = True
        if fields[6].strip() == '-':
            data[svc][t]['errorNoSenderIP'] += 1
            et['errorNoSenderIP'] = 1
        if not (fields[7] and fields[7] != '-'):
            data[svc][t]['errorNoSenderHost'] += 1
            et['errorNoSenderHost'] = 1
        if fields[8] and fields[8] != '-':
            geo = fields[8]
        else:
            data[svc][t]['errorNoSenderGeo'] += 1
            et['errorNoSenderGeo'] = 1
        if not (fields[9] and fields[9] != '-'):
            data[svc][t]['errorNo1stSenderIP'] += 1
            et['errorNo1stSenderIP'] = 1
        if not (fields[10] and fields[10] != '-'):
            data[svc][t]['errorNo1stSenderHost'] += 1
            et['errorNo1stSenderHost'] = 1
        if not (fields[11] and fields[11] != '-'):
            data[svc][t]['errorNo1stSenderGeo'] += 1
            et['errorNo1stSenderGeo'] = 1
        if not fields[12] or fields[12] == '-' or any(map(lambda r: r == '-_0', fields[12].split(','))):
            data[svc][t]['errorNoRecipients'] += 1
            et['errorNoRecipients'] = 1
        if not (fields[13] and fields[13] != '-'):
            data[svc][t]['errorNoCheckServer'] += 1
            et['errorNoCheckServer'] = 1
        if not fields[14] or fields[14] == '-':
            data[svc][t]['errorNoUID'] += 1
            et['errorNoUID'] = 1
        elif not all(map(lambda r: re.match(r'^\d+$', r.strip()), fields[14].split(','))):
            data[svc][t]['errorNotValidUID'] += 1
            et['errorNotValidUID'] = 1
    data[svc][t]['total'] += 1
    for t1 in ['errors', 'shortlog', 'geo', 'sender', 'checkup']:
        data['%s_%s_%s' % (t1, tp, route)][t]['total'] += 1
    if notype or len(types) < 1:
        data['shortlog_%s_%s' % (tp, route)][t]['NOTYPE'] += 1
    for t1 in et.keys():
        data['errors_%s_%s' % (tp, route)][t][t1] += 1
    if 'UN' in types:
        data['shortlog_%s_%s' % (tp, route)][t]['UNSUBSCR'] += 1
    if 'NU' in types:
        data['shortlog_%s_%s' % (tp, route)][t]['NOUNSUB'] += 1
    if 'PF' in types:
        data['shortlog_%s_%s' % (tp, route)][t]['PERSF'] += 1
    if 'BN' in types:
        data['shortlog_%s_%s' % (tp, route)][t]['BOUNCE'] += 1
    data['geo_%s_%s' % (tp, route)][t][geo] += 1
    if 'OT' in types:
        data['geo_%s_%s' % (tp, route)][t]['OUTTR'] += 1
    if 'HD' in types:
        data['sender_%s_%s' % (tp, route)][t]['HID'] += 1
    if 'DL' in types:
        data['sender_%s_%s' % (tp, route)][t]['DLV'] += 1
    if 'PS' in types:
        data['sender_%s_%s' % (tp, route)][t]['PAYSENDER'] += 1
    if 'FM' in types:
        data['sender_%s_%s' % (tp, route)][t]['FREEMAIL'] += 1
    if 'V6' in types:
        data['sender_%s_%s' % (tp, route)][t]['IPV6'] += 1
    if 'P3' in types:
        data['sender_%s_%s' % (tp, route)][t]['POP3'] += 1
    if 'IM' in types:
        data['sender_%s_%s' % (tp, route)][t]['IMAP'] += 1
    if 'FW' in types:
        data['sender_%s_%s' % (tp, route)][t]['FWD'] += 1
    if 'DS' in types:
        data['sender_%s_%s' % (tp, route)][t]['DSL'] += 1
    if 'MZ' in types:
        data['sender_%s_%s' % (tp, route)][t]['MARKET'] += 1
    if 'ES' in types:
        data['sender_%s_%s' % (tp, route)][t]['ESHOP'] += 1
    if 'PD' in types:
        data['sender_%s_%s' % (tp, route)][t]['PDD'] += 1
    if 'PL' in types:
        data['sender_%s_%s' % (tp, route)][t]['PDDLOCAL'] += 1
    if 'YW' in types:
        data['sender_%s_%s' % (tp, route)][t]['YAWEB'] += 1
    if 'YS' in types:
        data['sender_%s_%s' % (tp, route)][t]['YASMTP'] += 1
    if 'ZY' in types:
        data['checkup_%s_%s' % (tp, route)][t]['ALLTRUSTED'] += 1
    if 'DN' in types:
        data['checkup_%s_%s' % (tp, route)][t]['DEVNULL'] += 1
    if 'DF' in types:
        data['checkup_%s_%s' % (tp, route)][t]['DTESTFWD'] += 1
    if 'DC' in types:
        data['checkup_%s_%s' % (tp, route)][t]['DTESTMAIL'] += 1
    if '1K' in types:
        data['checkup_%s_%s' % (tp, route)][t]['1K_RCVD'] += 1
    if 'SF' in types:
        data['checkup_%s_%s' % (tp, route)][t]['SHINFAIL'] += 1
    if 'FR' in types:
        data['checkup_%s_%s' % (tp, route)][t]['FRESH'] += 1
    if 'SP' in types:
        data['checkup_%s_%s' % (tp, route)][t]['SPFPASS'] += 1
    if 'SN' in types:
        data['checkup_%s_%s' % (tp, route)][t]['SPFFAIL'] += 1
    if 'DP' in types:
        data['checkup_%s_%s' % (tp, route)][t]['DKIMPASS'] += 1
    if 'DR' in types:
        data['checkup_%s_%s' % (tp, route)][t]['DOMROLL'] += 1
    if 'YP' in types:
        data['checkup_%s_%s' % (tp, route)][t]['YAPOP3'] += 1
    if 'YM' in types:
        data['checkup_%s_%s' % (tp, route)][t]['YAIMAP'] += 1
    if 'UF' in types:
        data['checkup_%s_%s' % (tp, route)][t]['UNSUBF'] += 1
    if 'LY' in types:
        data['checkup_%s_%s' % (tp, route)][t]['LISTYT'] += 1
    if time() - lastSendTime > int(q.get('sendTimePeriod', 60)):
        data2 = data.copy()
        initData()
        send_monstats(data2)
        lastSendTime = time()

send_monstats(data)
