#!/usr/bin/env python2.7
# encoding: utf-8
# kate: space-indent on; indent-width 4; replace-tabs on;
#
import os, sys, re
from time import time, localtime, strptime, mktime
from socket import getfqdn
from yasmagent.core.modules.so.common import *
from yasmagent.core.providers import _FileScanner
from yasmagent.core.utils import log

__author__ = "Yaroslav Klimik <klimiky@yandex-team.ru>"
__version__ = "1.0"

class Compllog(_FileScanner):
    # string example for compl-log from Iex-Proxy (https://wiki.yandex-team.ru/AntiSpam/Complaints/#kompl-logkotoryjjpishetsjaviex-proxy):
    # noreply@faberlic.com    noreply@faberlic.com    S       08.03.2021 14:50:17     12.03.2021 23:02:26     _AU_ZT_FA_F5_ZU g-tamila95@yandex.ru    <5c8e9bdcb3aeef7b52088c29d42cb9c7@localhost.localdomain>        21    3.247.192.253 pa.faberlic.com RU Moscow       229096330       njXwOQ83xt-oGBmdpxq     Faberlic.com    У Вас новое сообщение   ham

    errorlog_path = None
    errorlog = None
    DOMAIN_RE = re.compile(r'(?:(?:[a-z0-9](?:[a-z0-9-]{0,61}[a-z0-9])?|xn\-\-[a-z0-9-]+)\.)+(?:xn\-\-[a-z0-9-]+|[a-z]+)', re.I)
    EMAIL_RE = re.compile(r'[\w\+\-\.=]+\@{0}'.format(DOMAIN_RE.pattern), re.I)
    MSGID_RE = re.compile(r'<?[a-zA-Z0-9_\@\.-]+>?')
    route = 'compl'
    svc = "so_%s" % route
    check_types = { 'S': 'spam', 'H': 'ham', 'D': 'delivery', 'M': 'malic' }
    allowed_os_list = ['Linux']

    def __init__(self, searchinstance, ctx):
        _FileScanner.__init__(self, searchinstance, ctx)
        self.maxiter = 10000
        self.block_size = 65536

    def writelog(self, msg, isTB=False):
        path = self.errorlog_path if self.errorlog_path and os.path.exists(self.errorlog_path) else "/var/log/so-logs/compllog-parser.log"
        if self.errorlog_path:
            if not self.errorlog:
                try:
                    self.errorlog = open(self.errorlog_path, 'at')
                except:
                    self.errorlog = sys.stderr
            error(msg, isTB, '', self.errorlog)
        else:
            error(msg, isTB)

    def getFilePath(self):
        path = self.ctx.getOption("compllog_path")
        return path if path and os.path.exists(path) else "/u0/{}/logs/compl.log".format(getfqdn())

    def setUp(self):
        super(Compllog, self).setUp()
        if not self.errorlog_path:
            self.errorlog_path = self.ctx.getOption("errorlog_path")

    def _prepareRecord(self, record):
        valid, fields = False, []
        fields = record.split("\t")
        if not all(fields[:7]):
            self.writelog('Compllog: Unknown row format for row: %s' % record)
            return valid, fields
        t, t0 = 0, int(time() - 10)
        if len(fields) > 15:
            try:
                if not re.match(r'^[A-Z]$', fields[2]):
                    self.writelog('Compllog: not valid sp-daemon resolution type for row: %s' % record)
                t = int(mktime(strptime(fields[4], "%d.%m.%Y %H:%M:%S")))
                if t >= t0:
                    valid = True
                else:
                    self.writelog("Compllog: obsolete data!")
            except Exception, e:
                self.updateValue('%s_errorNotValidDate_dmmm' % self.svc, 1)
        else:
            self.writelog("Strange line: %s" % record)
            log.info("Strange line: %s" % record)
        return valid, fields

    def _process(self, fields):
        types, notype, geo, st = [], False, '', 'unknown'
        if fields[0].strip() == '-' and fields[1] == '-':
            self.updateValue('%s_errorNoSender_dmmm' % self.svc, 1)
        else:
            if not self.EMAIL_RE.match(fields[0].strip()):
                self.updateValue('%s_errorNotValidSender_dmmm' % self.svc, 1)
            if not self.EMAIL_RE.match(fields[1].strip()):
                self.updateValue('%s_errorNotValidFrom_dmmm' % self.svc, 1)
        if fields[2] in self.check_types:
            st = self.check_types[fields[2]]
            self.updateValue('%s_%s_dmmm' % (self.svc, st), 1)
        else:
            self.updateValue('%s_errorNotValidType_dmmm' % self.svc, 1)
            self.writelog('NotValidType for record data: %s' % ' '.join(fields))
        if fields[4] == '-':
            self.updateValue('%s_errorNoCmplDate_dmmm' % self.svc, 1)
        if fields[5].find('_') > -1:
            for t in fields[5].split('_'):
                if t:
                    self.updateValue('%s_%s_dmmm' % (self.svc, t), 1)
                    self.updateValue('%s_%s_%s_dmmm' % (self.svc, st, t), 1)
                    types.append(t)
        else:
            self.updateValue('%s_errorNoTypes_dmmm' % self.svc, 1)
            notype = True
        if notype or len(types) < 1:
            self.updateValue('%s_NOTYPE_dmmm' % self.svc, 1)
            self.updateValue('%s_%s_NOTYPE_dmmm' % (self.svc, st), 1)
        if fields[6] == '-':
            self.updateValue('%s_errorNoLogin_dmmm' % self.svc, 1)
        if not (fields[7] and fields[7] != '-'):
            self.updateValue('%s_errorNoMessageId_dmmm' % self.svc, 1)
        elif not self.MSGID_RE.match(fields[7]):
            self.updateValue('%s_errorNotValidMessageId_dmmm' % self.svc, 1)
        if not (fields[8] and fields[8] != '-'):
            self.updateValue('%s_errorNoSenderIP_dmmm' % self.svc, 1)
        if not (fields[9] and fields[9] != '-'):
            self.updateValue('%s_errorNoSenderHost_dmmm' % self.svc, 1)
        if fields[10] and fields[10] != '-':
            self.updateValue('%s_geo_%s_dmmm' % (self.svc, fields[10]), 1)
        else:
            self.updateValue('%s_errorNoSenderGeo_dmmm' % self.svc, 1)
        if not fields[11] or fields[11] == '-':
            self.updateValue('%s_errorNoUID_dmmm' % self.svc, 1)
        elif not re.match(r'^\d+$', fields[11]):
            self.updateValue('%s_errorNotValidUID_dmmm' % self.svc, 1)
        if not fields[12] or fields[12] == '-':
            self.updateValue('%s_errorNoQueueID_dmmm' % self.svc, 1)
        if fields[15] and fields[15] != '-':
            if fields[15] == 'ham' and 'DL' in types or fields[15].startswith('dlv') or fields[15] == 'DLVR':
                self.updateValue('%s_%s_DLVR_dmmm' % (self.svc, st), 1)
                self.updateValue('%s_DLVR_dmmm' % self.svc, 1)
            elif fields[15] == 'spam' and 'ML' in types:
                self.updateValue('%s_%s_MALIC_dmmm' % (self.svc, st), 1)
                self.updateValue('%s_MALIC_dmmm' % self.svc, 1)
            elif fields[15] == 'spam' or fields[15] == 'YES':
                self.updateValue('%s_%s_SPAM_dmmm' % (self.svc, st), 1)
                self.updateValue('%s_SPAM_dmmm' % self.svc, 1)
            elif fields[15] == 'ham' or fields[15] == 'NO':
                self.updateValue('%s_%s_HAM_dmmm' % (self.svc, st), 1)
                self.updateValue('%s_HAM_dmmm' % self.svc, 1)
            else:
                self.updateValue('%s_%s_UNKNOWN_dmmm' % (self.svc, st), 1)
                self.updateValue('%s_UNKNOWN_dmmm' % self.svc, 1)
        else:
            self.updateValue('%s_errorNoSpamFlag_dmmm' % self.svc, 1)
        self.updateValue('%s_total_dmmm' % self.svc, 1)
