#!/usr/bin/python -u
# -*- coding: utf-8 -*-

import sys
import re
import time


# add "1;2;3;" to test items (at the begging of line)
conditions = {
    "postfix": {
        "qmgr": [
            {
                "regex": '^(?P<session_id>[^:]+): from=(?P<from>[^,]+), size=(?P<size>[0-9]+), nrcpt=(?P<nrcpt>[0-9]+) \((?P<status>queue active)\)$',
                "tskv_format": 'mail-postfix-log',
                "tests": [
                    "Nov 15 15:39:24 forward20 postfix/qmgr[30300]: B8F961042079: from=<sk-glebov@yandex.ru>, size=64221, nrcpt=1 (queue active)"
                ]
            },
            {
                "regex": '^(?P<session_id>[^:]+): (?P<status>removed)$',
                "tskv_format": 'mail-postfix-log',
                "tests": [
                    "Nov 15 15:39:26 forward20 postfix/qmgr[30300]: B8F961042079: removed"
                ]
            },
            {
                "regex": '^(?P<session_id>[^:]+): from=(?P<from>[^,]+), status=(?P<status>expired), (?P<status_msg>.*)$',
                "tskv_format": 'mail-postfix-log',
                "tests": [
                    "Apr 13 00:11:29 forward10 postfix/qmgr[18002]: 32F3E10210A4: from=<vovk15@yandex.ru>, status=expired, returned to sender"
                ]
            },
            {
                "regex": '^(?P<session_id>[^:]+): (?P<status>(connect|disconnect) from) (?P<fqdn>[a-zA-Z0-9\.-]+)\[(?P<ip>[^\]]+)\]$',
                "tskv_format": 'mail-postfix-log',
                "tests": [
                ]
            }
        ],
        "cleanup": [
            {
                "regex": '^(?P<session_id>[^:]+): message-id=<(?P<message_id>[^>]+)>$',
                "tskv_format": 'mail-postfix-log',
                "tests": [
                    "Nov 15 15:39:24 forward20 postfix/cleanup[2260]: B8F961042079: message-id=<443161384515564@web25g.yandex.ru>"
                ]
            },
            {
                "regex": '^(?P<session_id>[^:]+): (?P<status>(connect|disconnect) from) (?P<fqdn>[a-zA-Z0-9\.-]+)\[(?P<ip>[^\]]+)\]$',
                "tskv_format": 'mail-postfix-log',
                "tests": [
                ]
            }
        ],
        "smtpd": [
            {
                "regex": '^(?P<session_id>[^:]+): client=(?P<client_fqdn>[a-z0-9\.-]+)\[(?P<client_ip>[^\]]+)\]$',
                "tskv_format": 'mail-postfix-log',
                "tests": [
                    "Nov 15 15:39:24 forward20 postfix/smtpd[1972]: B8F961042079: client=web25g.yandex.ru[95.108.253.234]"
                ]
            },
            {
                "regex": '^(?P<status>(connect|disconnect) from) (?P<fqdn>[a-zA-Z0-9\.-]+)\[(?P<ip>[^\]]+)\]$',
                "tskv_format": 'mail-postfix-log',
                "tests": [
                    "Feb 22 00:00:02 forward1m postfix/smtpd[28121]: disconnect from mxfront9m.mail.yandex.net[37.140.138.59]",
                    "Feb 22 00:00:07 forward1m postfix/smtpd[28121]: connect from web26m.yandex.ru[37.140.138.117]"
                ]
            }
        ],
        "lmtp": [
            {
                "regex": '^(?P<session_id>[^:]+): (?P<status>(connect|disconnect) from) (?P<fqdn>[a-zA-Z0-9\.-]+)\[(?P<ip>[^\]]+)\]$',
                "tskv_format": 'mail-postfix-log',
                "tests": [
                ]
            },
            #            {
            #                "regex": '^(?P<session_id>[^:]+): to=(?P<to>[^,]+), relay=(none|(?P<relay_fqdn>[a-zA-Z0-9-\.]+)\[(?P<relay_ip>[^\]]+)\]\:(?P<port>[0-9]+)), delay=(?P<delay>[0-9\.]+), delays=(?P<delays>([0-9\.]+\/){3}[0-9\.]+), dsn=(?P<dsn>[0-9\.]+\.[0-9\.]+\.[0-9\.]+), status=(?P<status>sent) \((?P<status_msg>[^\)]+)\)$',
            #                "tskv_format": 'mail-postfix-log',
            #                "tests": [
            #                    "Mar 16 03:32:05 mxcorp2 postfix/lmtp[20140]: 2619F640131: to=<narod-cron@mail.yandex-team.ru>, relay=mxbacks.yandex-team.ru[93.158.157.176]:25, delay=0.25, delays=0.06/0.01/0.04/0.13, dsn=2.0.0, status=sent (250 2.0.0 Ok: queued on mxbackcorp1g.mail.yandex.net as CxF1d4kGVR-W5kKjQfe)"
            #                    ]
            #                },
            {
                "regex": '^(?P<session_id>[^:]+): to=(?P<to>[^,]+), relay=(none|(?P<relay_fqdn>[a-zA-Z0-9-\.]+)\[(?P<relay_ip>[^\]]+)\]\:(?P<port>[0-9]+)), delay=(?P<delay>[0-9\.]+), delays=(?P<delays>([0-9\.]+\/){3}[0-9\.]+), dsn=(?P<dsn>[0-9\.]+\.[0-9\.]+\.[0-9\.]+), status=(?P<status>(sent|bounced|deferred)) \((?P<status_msg>.*)\)$',
                "tskv_format": 'mail-postfix-log',
                "tests": [
                    "Mar 16 03:50:02 mxbackcorp1e postfix/lmtp[811]: ED4D01E0067: to=<root-robot@mail.yandex-team.ru>, relay=127.0.0.1[127.0.0.1]:1234, delay=0.19, delays=0.14/0.01/0/0.04, dsn=4.5.0, status=deferred (host 127.0.0.1[127.0.0.1] said: 451 4.5.0 internal mail db error; o23eR4h7  (in reply to end of DATA command))"
                ]
            },
            {
                "regex": '^(?P<session_id>[^:]+): to=(?P<to>[^,]+), relay=(none|(?P<relay_fqdn>[a-zA-Z0-9-\.]+)\[(?P<relay_ip>[^\]]+)\]\:(?P<port>[0-9]+)), conn_use=(?P<conn_use>[0-9]+), delay=(?P<delay>[0-9\.]+), delays=(?P<delays>([0-9\.]+\/){3}[0-9\.]+), dsn=(?P<dsn>[0-9\.]+\.[0-9\.]+\.[0-9\.]+), status=(?P<status>(sent|bounced|deferred)) \((?P<status_msg>.*)\)$',
                "tskv_format": 'mail-postfix-log',
                "tests": [
                    "Oct 28 10:24:02 mxback2o postfix/lmtp[9342]: 5234012601C9: to=<zorin.s@gamma-steel.ru>, relay=127.0.0.1[127.0.0.1]:1234, conn_use=4, delay=0.62, delays=0.06/0/0/0.55, dsn=5.1.1, status=bounced (host 127.0.0.1[127.0.0.1] said: 554 5.1.1 Unknown user; BtS9T6Js 160440736725075807 611 (in reply to end of DATA command))"
                ]
            },
            #	    {
            #		"regex": '^(?P<session_id>[^:]+): to=(?P<to>[^,]+), relay=(none|(?P<relay_fqdn>[a-zA-Z0-9-\.]+)\[(?P<relay_ip>[^\]]+)\]\:(?P<port>[0-9]+)), conn_use=(?P<conn_use>[0-9]+), delay=(?P<delay>[0-9\.]+), delays=(?P<delays>([0-9\.]+\/){3}[0-9\.]+), dsn=(?P<dsn>[0-9\.]+\.[0-9\.]+\.[0-9\.]+), status=(?P<status>deferred) \((?P<status_msg>.*)\)$',
            #		"tskv_format": 'mail-postfix-log',
            #		"tests": [
            #		    "Nov  7 13:37:04 mxback1o postfix/lmtp[22317]: DC52EA20053: to=<zhanna-yanovska@yandex.ru>, relay=127.0.0.1[127.0.0.1]:1234, conn_use=12, delay=30, delays=20/0/0/10, dsn=4.5.0, status=deferred (host 127.0.0.1[127.0.0.1] said: 451 4.5.0 internal mail db error; GMOGO5PI 160440736725096645 1238 (in reply to end of DATA command))"
            #		    ]
            #		},
            {
                "regex": '^(?P<session_id>[^:]+): to=(?P<to>[^,]+), orig_to=(?P<orig_to>[^,]+), relay=(none|(?P<relay_fqdn>[a-zA-Z0-9-\.]+)\[(?P<relay_ip>[^\]]+)\]\:(?P<port>[0-9]+)), delay=(?P<delay>[0-9\.]+), delays=(?P<delays>([0-9\.]+\/){3}[0-9\.]+), dsn=(?P<dsn>[0-9\.]+\.[0-9\.]+\.[0-9\.]+), status=(?P<status>(sent|bounced|deferred)) \((?P<status_msg>[^\)]+)\)$',
                "tskv_format": 'mail-postfix-log',
                "tests": [
                    "Nov  1 22:09:03 mxback8o postfix/lmtp[8437]: BFE7265E01AC: to=<mtan-ionina-fpb6W@yandex.ru>, orig_to=<tan-ionina@yandex.ru>, relay=127.0.0.1[127.0.0.1]:1234, delay=1.3, delays=0/0/0/1.3, dsn=2.0.0, status=sent (250 2.0.0 Ok; cFIlx8jl 160440736725077343 1283)"
                ]
            },
            {
                "regex": '^(?P<session_id>[^:]+): to=(?P<to>[^,]+), orig_to=(?P<orig_to>[^,]+), relay=(none|(?P<relay_fqdn>[a-zA-Z0-9-\.]+)\[(?P<relay_ip>[^\]]+)\]\:(?P<port>[0-9]+)), conn_use=(?P<conn_use>[0-9]+), delay=(?P<delay>[0-9\.]+), delays=(?P<delays>([0-9\.]+\/){3}[0-9\.]+), dsn=(?P<dsn>[0-9\.]+\.[0-9\.]+\.[0-9\.]+), status=(?P<status>(sent|bounced|deferred)) \((?P<status_msg>[^\)]+)\)$',
                "tskv_format": 'mail-postfix-log',
                "tests": [
                    "Nov  1 22:09:03 mxback8o postfix/lmtp[8437]: BFE7265E01AC: to=<mtan-ionina-fpb6W@yandex.ru>, orig_to=<tan-ionina@yandex.ru>, relay=127.0.0.1[127.0.0.1]:1234, conn_use=10, delay=1.3, delays=0/0/0/1.3, dsn=2.0.0, status=sent (250 2.0.0 Ok; cFIlx8jl 160440736725077343 1283)"
                ]
            }
            #	    {
            #		"regex": '^(?P<session_id>[^:]+): to=(?P<to>[^,]+), relay=(none|(?P<relay_fqdn>[a-zA-Z0-9-\.]+)\[(?P<relay_ip>[^\]]+)\]\:(?P<port>[0-9]+)), conn_use=(?P<conn_use>[0-9]+), delay=(?P<delay>[0-9\.]+), delays=(?P<delays>([0-9\.]+\/){3}[0-9\.]+), dsn=(?P<dsn>[0-9\.]+\.[0-9\.]+\.[0-9\.]+), status=(?P<status>sent) \((?P<status_msg>.*)\)$',
            #		"tskv_format": 'mail-postfix-log',
            #		"tests": [
            #		    "Sep  7 09:52:03 mxback8h postfix/lmtp[7861]: ADF84201304: to=<akm302@yandex.ru>, relay=127.0.0.1[127.0.0.1]:1234, conn_use=20, delay=0.6, delays=0/0/0/0.6, dsn=2.0.0, status=sent (250 2.0.0 Ok; D9FMEDbL 2160000000036476745 none)"
            #		]
            #	    }
        ],
        "smtp": [
            {
                "regex": '^(?P<session_id>[^:]+): to=(?P<to>[^,]+), relay=(none|(?P<relay_fqdn>[a-zA-Z0-9-\.]+)\[(?P<relay_ip>[^\]]+)\]\:(?P<port>[0-9]+))(?:, conn_use=(?P<conn_use>[0-9]+))?, delay=(?P<delay>[0-9\.]+), delays=(?P<delays>([0-9\.]+\/){3}[0-9\.]+), dsn=(?P<dsn>[0-9\.]+\.[0-9\.]+\.[0-9\.]+), status=(?P<status>(sent|deferred)) \((?P<status_msg>.*)\)$',
                "tskv_format": 'mail-postfix-log',
                "tests": [
                    "Feb 22 05:45:38 forward1m postfix/smtp[4521]: 5669A1220C8E: to=<sachkova@vetprom.ru>, relay=none, delay=13361, delays=13331/0.05/30/0, dsn=4.4.1, status=deferred (connect to mail.vetprom.ru[95.84.189.195]:25: Connection timed out)",
                    "Feb 22 00:00:03 forward1m postfix/smtp[25658]: D8B241220651: to=<root-passport@yandex-team.ru>, relay=mx-corp.yandex.ru[213.180.204.113]:25, delay=0.56, delays=0.28/0.1/0.1/0.07, dsn=2.0.0, status=sent (250 2.0.0 Ok: queued as 3fW3Rl23Kkz6NhFg)",
                    "Nov 15 15:39:24 forward20 postfix/smtp[30648]: B8F961042079: to=<chernobrovkin_64@mail.ru>, relay=mxs.mail.ru[94.100.176.20]:25, conn_use=3, delay=0.25, delays=0.13/0.03/0/0.08, dsn=2.0.0, status=sent (250 OK id=1VhHkC-0004Ib-SV)"
                ]
            },
            {
                "regex": '^(?P<session_id>[^:]+): to=(?P<to>[^,]+), relay=(none|(?P<relay_fqdn>[a-zA-Z0-9-\.]+)\[(?P<relay_ip>[^\]]+)\]\:(?P<port>[0-9]+))(?:, conn_use=(?P<conn_use>[0-9]+))?, delay=(?P<delay>[0-9\.]+), delays=(?P<delays>([0-9\.]+\/){3}[0-9\.]+), dsn=(?P<dsn>[0-9\.]+\.[0-9\.]+\.[0-9\.]+), status=(?P<status>bounced) \((?P<status_msg>.*)\)$',
                "tskv_format": 'mail-postfix-log',
                "tests": [
                    "Feb 22 00:02:32 forward1m postfix/smtp[29406]: 5F6151221668: to=<-no-reply-@superjob.ru>, relay=mail.superjob.ru[91.206.146.2]:25, delay=2.5, delays=0.16/0.04/0.15/2.1, dsn=5.0.0, status=bounced (host mail.superjob.ru[91.206.146.2] said: 550 -no-reply-@superjob.ru unknown user account (in reply to RCPT TO command))"
                ]
            },
            {
                "regex": '^(?P<session_id>[^:]+): (?P<status>(connect|disconnect) from) (?P<fqdn>[a-zA-Z0-9\.-]+)\[(?P<ip>[^\]]+)\]$',
                "tskv_format": 'mail-postfix-log',
                "tests": [
                ]
            },
            {
                "regex": '^(?P<session_id>[^:]+): to=(?P<to>[^,]+), orig_to=(?P<orig_to>[^,]+), relay=(none|(?P<relay_fqdn>[a-zA-Z0-9-\.]+)\[(?P<relay_ip>[^\]]+)\]\:(?P<port>[0-9]+)), delay=(?P<delay>[0-9\.]+), delays=(?P<delays>([0-9\.]+\/){3}[0-9\.]+), dsn=(?P<dsn>[0-9\.]+\.[0-9\.]+\.[0-9\.]+), status=(?P<status>bounced) \((?P<status_msg>[^\)]+)\)$',
                "tskv_format": 'mail-postfix-log',
                "tests": [
                    "Apr 13 00:00:12 forward10 postfix/smtp[15780]: 4C5E31020897: to=<postmaster@forward10.mail.yandex.net>, orig_to=<postmaster>, relay=none, delay=0.13, delays=0.08/0.04/0.01/0, dsn=5.4.6, status=bounced (mail for forward10.mail.yandex.net loops back to myself)"
                ]
            },
            {
                "regex": '^(?P<status>connect to) (?P<fqdn>[a-zA-Z0-9\.-]+)\[(?P<ip>[^\]]+)\]\:(?P<port>[0-9]+)\: (?P<status_msg>(Connection timed out|Connection refused))$',
                "tskv_format": 'mail-postfix-log',
                "tests": [
                    "Feb 22 00:00:20 forward1m postfix/smtp[25500]: connect to mail.rd-bud.com.ua[213.155.23.246]:25: Connection timed out",
                    "Feb 22 00:00:55 forward1m postfix/smtp[26666]: connect to fail.istra.net.ru[195.26.30.1]:25: Connection refused"
                ]
            }
        ]
    },
}

year = time.strftime("%Y")
month_map = {
    "Jan": "01",
    "Feb": "02",
    "Mar": "03",
    "Apr": "04",
    "May": "05",
    "Jun": "06",
    "Jul": "07",
    "Aug": "08",
    "Sep": "09",
    "Oct": "10",
    "Nov": "11",
    "Dec": "12"
}

sub_regex = re.compile(r'([\n\r\0\\"])')
extract_regex = re.compile(r'^(?P<magic>\d+;\d+;\d+;)(?P<time_month>\w+)\s+(?P<time_day>\d+)\s+(?P<time_time>\d{2}:\d{2}:\d{2})\s(?P<localhost>[\w-]+)\s(?P<daemon_raw>[^\s:]+):\s(?P<line>.*)')
def sanitize_str(line):
    return re.sub(sub_regex, r'\\\1', line)

class line_from_push_client():
    """Базовый класс для всех строк, имеющих стандартный префикс для maillog."""
    def __init__(self, ret, daemon_raw, line):
        self.line = line
        self.daemon_raw = daemon_raw
        self.tskv_format = "mail-log-unparsed"
        self.ret = ret
        self.l = []

    def partial_sanitize_fields(self):
        for key in self.ret.iterkeys():
            if key in ["to", "rcpt", "from", "for"]:
                self.ret[key] = self.ret[key].strip('<>"\'')
            #            self.ret[key] = re.sub(line_from_push_client.sub_regex, r'\\\1', self.ret[key])
            if '=' in key:
                self.ret[key.replace('=', '\=')] = self.ret.pop(key)
            if '\t' in self.ret[key]:
                self.ret[key] = self.ret[key].replace('\t', '\\t')

    def to_push_client(self):
        magic = self.ret.pop("magic") + "tskv\t"
        print sanitize_str(magic + "\t".join([k + "=" + v for k, v in self.ret.iteritems()] + self.l))
#        print magic + "\t".join([k + "=" + v for k, v in self.ret.iteritems()])


class postfix_line(line_from_push_client):
    """Класс для всех строк от Postfix."""
    def discover_daemon(self):
        daemon_regex_obj = re.match(r'^(?P<daemon>postfix)/(?P<postfix_daemon>[\w-]+)\[(?P<postfix_daemon_pid>\d+)\]$', self.daemon_raw)
        if daemon_regex_obj:
            self.ret.update(daemon_regex_obj.groupdict())
        else:
            raise

    def parse(self):
        for condition in conditions[self.ret["daemon"]][self.ret["postfix_daemon"]]:
            regex_obj = condition.get("regex_obj")
            if regex_obj is None:
                condition["regex_obj"] = regex_obj = re.compile(condition["regex"])
            match_obj = regex_obj.match(self.line)
            if match_obj is not None:
                self.ret["tskv_format"] = condition["tskv_format"]
                regex_obj_dict = match_obj.groupdict()
                for j in regex_obj_dict:
                    if regex_obj_dict[j] is None:
                        continue
                    self.ret[j] = regex_obj_dict[j]
                return
        raise
#        self.ret["unparsed"] = self.line
#        self.ret["tskv_format"] = "mail-postfix-log-unparsed"

def extract_prefix(line):
    """Отделение стандартного префикса, демона от основного сообщения (для строк из maillog)"""
    prefix_regex_obj = re.match(extract_regex, line)


    if prefix_regex_obj:
        d = prefix_regex_obj.groupdict()
        # year field is absent in log => inserting current year
        #        timestamp = time.strptime(year + " " + d["timestamp"], "%Y %b %d %H:%M:%S")
        #        d["timestamp"] = time.strftime("%Y-%m-%d %H:%M:%S", timestamp)
        d["timestamp"] = year + "-" + month_map[d.pop("time_month")] + "-" + ("0" + d.pop("time_day"))[-2:] + " " + d.pop("time_time")

        line = d.pop("line")
        daemon_raw = d.pop("daemon_raw")
        return (d, daemon_raw, line)
    else:
        raise

def line2obj(d, daemon_raw, line):
    """Выбор и возврат объекта строки соответствующего демона"""
    if daemon_raw.startswith("postfix"):
        l = postfix_line(d, daemon_raw, line)
    else:
        raise
    return l

def main():
    for line in sys.stdin:
        try:
            l = line2obj(*extract_prefix(line))
            l.discover_daemon()
            l.parse()
            l.partial_sanitize_fields()
            l.to_push_client()
        except:
            continue

main()