#!/usr/bin/env python2.7
# encoding: utf-8
# kate: space-indent on; indent-width 4; replace-tabs on;
#
import os, os.path, sys, re, json
sys.path.insert(0, 'WORKING_DIR')
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))

from time import strftime, sleep
from collections import defaultdict
from socket import gethostname
from common import getUUID, sendEmail
from so_log_tail import LogTail

__author__ = "Yaroslav Klimik <klimiky@yandex-team.ru>"
__version__ = "1.0"

global LOG_TAIL

SENDING_TIMEOUT = 1.0 / 80
KASPER_EMAIL = "ya2kas_mal@spam.kaspersky.com"
ROBOT_EMAIL = "robot-mailspam@yandex-team.ru"
YANDEX_FROM = "Yandex Mail AntiSpam Feedback <fbl-noreply@yandex.ru>"
PARSE_PERIOD = 600
LOGFILE = sys.argv[1] if len(sys.argv) > 1 else '/u0/{}/logs/kaspersky.log'.format(gethostname())
REPLACE_STR = "<removed@email.xxx>"
REPLACE_DOMAIN_STR = "<removed>"
KASPER1_RE = re.compile(r'\blua[ -]profiles\b', re.I)
KASPER2_RE = re.compile(r'\bluacore\b', re.I)
ERRORLOG = "LOGS_DIR/malic-log-parser-errors.log"
HEADER_RE = re.compile(r'^([\w\-]+)\s*:\s+(.*)$', re.I)
HEADER_CONTINUE_RE = re.compile(r'^\s+(\S+.*)$')
EMPTY_LINE_RE = re.compile(r'^\s*$')
IP_RE = re.compile(r"[\d:\.a-f]+", re.I)
YANDEX_DOMAIN_RE = re.compile(r"(?:[a-z0-9](?:[a-z0-9-]{0,61}[a-z0-9])?\.)*\b(?:yandex|ya|narod|yandex-team)\.(?:[a-z]{2,4}|yandex)", re.I)
YANDEX_EMAIL_RE = re.compile(r"[\w\+\-\.]+\@{0}".format(YANDEX_DOMAIN_RE.pattern), re.I)


def generateMsgEntity(msgInfo):
    isHeader, headers, headerValues, headerNames, recipients = True, [], [], {}, []
    msgHeaders = msgBody = header = headerValue = ""
    subject, headerFrom, queueid = "Fwd: ", "", ""
    for rcpt in msgInfo["rcpt"]:
        recipient = rcpt.split()[0]
        if recipient:
            recipients.append(recipient)
    for line in re.split(r'\r?\n', msgInfo["email"]):
        if isHeader:
            m = HEADER_CONTINUE_RE.match(line)
            if m:
                headerValue += "\n\t" + m.group(1)
                continue
            else:
                if header:
                    if not header.startswith("x-yandex"):
                        headers.append(header)
                        headerValues.append(headerValue)
                    elif header == "x-yandex-queueid":
                        queueid = headerValue
                m = HEADER_RE.match(line)
                if m:
                    header = m.group(1).lower()
                    headerNames[header] = m.group(1)
                    headerValue = m.group(2)
                    continue
                elif EMPTY_LINE_RE.match(line):
                    msgBody += "\n"
                    if isHeader:
                        isHeader, header = 0, ""
                    continue
        msgBody += line + "\n"
    for i in range(len(headers)):
        #if not (KASPER1_RE.search(headers[i]) or KASPER1_RE.search(headerValues[i]) or KASPER2_RE.search(headers[i]) or KASPER2_RE.search(headerValues[i])):
        #    return '', '', '', queueid
        if headers[i] == 'subject':
            subject += headerValues[i]
        elif headers[i] == 'received':
            headerValues[i] = re.sub(r'(?i)([\s\(]+)(%s(?:\s\(?\[%s\]\)?)?)([;:\)]?\s+)' % (YANDEX_DOMAIN_RE.pattern, IP_RE.pattern), r'\1{0}\3'.format(REPLACE_DOMAIN_STR), headerValues[i])
            headerValues[i] = re.sub(r'(?i)(\s+<?)(%s)(>?;?\s+)' % (YANDEX_EMAIL_RE.pattern), r'\1{0}\3'.format(REPLACE_STR), headerValues[i])
            for recipient in recipients:
                headerValues[i] = headerValues[i].replace(recipient, REPLACE_STR)
        elif headers[i] == 'authentication-results':
            headerValues[i] = re.sub(r'(?i)([\s\(]*)(%s)([;:\)]?\s+)' % YANDEX_DOMAIN_RE.pattern, r'\1{0}\3'.format(REPLACE_DOMAIN_STR), headerValues[i])
            headerValues[i] = re.sub(r'(?i)(\s+)(\S*?\b%s\b\S*?)(\s+)' % YANDEX_DOMAIN_RE.pattern, r'\1{0}\3'.format(REPLACE_DOMAIN_STR), headerValues[i])
            for recipient in recipients:
                headerValues[i] = headerValues[i].replace(recipient, REPLACE_STR)
        elif headers[i] == 'return-path':
            headerValues[i] = re.sub(r'(?i)(\s*<?)(%s)(>?;?\s*)' % YANDEX_EMAIL_RE.pattern, r'\1{0}\3'.format(REPLACE_STR), headerValues[i])
            headerValues[i] = re.sub(r'(?i)(\s*)(\S*?\b%s\b\S*?)(\s+)' % YANDEX_DOMAIN_RE.pattern, r'\1{0}\3'.format(REPLACE_DOMAIN_STR), headerValues[i])
            for recipient in recipients:
                headerValues[i] = headerValues[i].replace(recipient, REPLACE_STR)
        elif headers[i] == 'to' or headers[i] == 'cc' or headers[i] == 'bcc':
            headerValues[i] = REPLACE_STR
        elif headers[i] == "from":
            headerFrom = headerValues[i]
        msgHeaders += (headerNames[headers[i]] if headerNames[headers[i]] else headers[i].capitalize()) + ': ' + (headerValues[i] if headerValues[i] else '') + "\n"
    return (msgHeaders + "\n" + msgBody), headerFrom, subject, queueid


def generateMsg(msgInfo, toEmail):
    global LOG_TAIL
    curDate = strftime("%a, %d %m %Y %H:%M:%S %z")
    boundary = "=_NextPart_%s" % getUUID('.')
    entity, headerFrom, subject, queueid = generateMsgEntity(msgInfo)
    if not entity:
        LOG_TAIL.error("Message with queueid=%s is not relevant" % queueid)
        return ""
    return """Content-Type: multipart/report; boundary="{0}"; report-type="feedback-report"
MIME-Version: 1.0
Date: {1}
Message-ID: <{2}@fbl-out.yandex.ru>
From: {3}
To: {4}
Subject: {5}

This is a multi-part message in MIME format.


--{0}
MIME-Version: 1.0
Content-Disposition: inline
Content-Transfer-Encoding: 7bit
Content-Type: text/plain; charset="US-ASCII"
Date: {1}

This is an email abuse report for an email message received from {6} on {1}

--{0}
MIME-Version: 1.0
Content-Disposition: inline
Content-Transfer-Encoding: 7bit
Content-Type: message/feedback-report

Feedback-Type: abuse
User-Agent: Yandex-Mail-Report/1.0
Version: 0.1
Original-Mail-From: {7}
Original-Rcpt-To: {8}
Received-Date: {1}
Reported-Domain: {6}

--{0}
Content-Type: message/rfc822;
Content-Disposition: attachment;
    filename="{9}.eml"

{10}


--{0}--
""".format(boundary, curDate, getUUID(''), YANDEX_FROM, KASPER_EMAIL, subject, msgInfo["mailfrom"].split()[0], headerFrom, REPLACE_STR, getUUID('_'), entity)


def processMsgInfo(msgInfo):
    global LOG_TAIL
    #msg = generateMsg(msgInfo, ROBOT_EMAIL)
    #if msg:
        #LOG_TAIL.error("Sending message: " + msg)
    #    sendEmail(msg, fromaddr=YANDEX_FROM, toaddr=ROBOT_EMAIL)
    #    sleep(SENDING_TIMEOUT)
    msg = generateMsg(msgInfo, KASPER_EMAIL)
    if msg:
        #LOG_TAIL.error("Sending message: " + msg)
        result = sendEmail(msg, fromaddr=YANDEX_FROM, toaddr=KASPER_EMAIL, log_fh=LOG_TAIL.ERRORLOG)
        if result:
            LOG_TAIL.error("Sending to Kaspersky failed: %s" % result)
        sleep(SENDING_TIMEOUT)


if __name__ == '__main__':
    global LOG_TAIL
    LOG_TAIL = LogTail(None, log=LOGFILE, id="malic-for-kaspersky", errorlog=ERRORLOG)
    for record in LOG_TAIL():
        try:
            if not record.strip():
                continue
            msgInfo = json.loads(unicode(record.strip(), "utf8", "ignore"))
            processMsgInfo(msgInfo)
        except Exception, e:
            LOG_TAIL.error("Processing of message's info failed: %s" % str(e), True)
