#!/usr/bin/python
# -*- coding: UTF-8 -*-

import argparse
from datetime import datetime

from mail.tools.common.postfix import (get_queue, remove_messages, get_domain, is_from_deferred,
                                       has_single_rcpt, is_from_mailer_daemon)


TOXIC_DOMAINS = {
    "60843.simplecloud.ru",
    "adanaapart.com",
    "anextour.com",
    "bebistar74.ru",
    "cancercentersofia.ru",
    "celciuman.cloudns.cx",
    "cloudwebidea.com",
    "codec.com.tr",
    "company.com",
    "darersh.ru",
    "dekilapola.cloudns.cx",
    "email.com",
    "example.com",
    "isbank.com.tr",
    "isogd.pro",
    "itc-labs.ru",
    "jelastic.lunacloud.com",
    "karmawall.com",
    "kritlab.ru",
    "lawattor.ru",
    "localhost.com",
    "mail1.wu.ems-infics.com",
    "mail2.wu.ems-infics.com",
    "mail3.msndr.com",
    "mail3.smscentre.com",
    "mail.orb.ru",
    "makarenko.by",
    "minxy.ru",
    "navaroman.cloudns.cx",
    "plexgames.com",
    "pochta.ru",
    "privatefx.pro",
    "qdlhsy.com",
    "quenukimal.cloudns.cx",
    "racoono.ru",
    "rastenieman.cloudns.cx",
    "rickalon.cloudns.cx",
    "rtjyuk7ykerewbwef3wef.jelastic.dogado.eu",
    "russiangram.com",
    "sirafim.cloudns.cx",
    "tbs.travel",
    "tester.ru",
    "test.kolobox.shvetsov.tech",
    "undeliverable.masterhost.ru",
    "unionstudent.ru",
    "webmasterset.ru",
    "xd-design.msk.ru",
    "yndex.ru"
}


def is_toxic_sender_or_rcpt(item):
    sender_domain = get_domain(item["sender"])
    rcpt_domain = get_domain(item["recipients"][0]["address"])
    return (sender_domain and sender_domain in TOXIC_DOMAINS) or \
           (rcpt_domain and rcpt_domain in TOXIC_DOMAINS)


def is_queued_long_enough(item):
    day = 60 * 60 * 24
    arrival_time = int(item["arrival_time"])
    arrival_time = datetime.utcfromtimestamp(arrival_time)
    return (datetime.now() - arrival_time).total_seconds() > day


def is_rcpt_no_reply(item):
    rcpt = item["recipients"][0]["address"]
    return rcpt.startswith(("noreply@", "no-reply@", "no_reply@", "bezotveta@", "bezotweta@"))


def is_rcpt_spam(item):
    """ Rcpt is mail from because it's DSN.
        ..notes: MAILDLV-2877
    """
    rcpt = item["recipients"][0]["address"]
    return rcpt.startswith("bounce-") and rcpt.endswith((".mebeliani.ru", ".meaningbar.ru"))


def is_network_problem(item):
    delay_reason = item["recipients"][0]["delay_reason"]
    return ":25: Connection refused" in delay_reason or \
           ":25: Connection timed out" in delay_reason


def find_trash_messages(queue_data):
    trash = []
    for item in queue_data:
        if not (is_from_deferred(item) and has_single_rcpt(item)):
            continue

        if is_toxic_sender_or_rcpt(item):
            trash.append(item)
            continue

        if is_from_mailer_daemon(item) and \
                is_rcpt_no_reply(item) and \
                is_queued_long_enough(item):
            trash.append(item)
            continue

        if is_from_mailer_daemon(item) and \
                is_rcpt_spam(item) and \
                is_network_problem(item):
            trash.append(item)
            continue

    return trash


def main(dry_run):
    items = get_queue("deferred")
    messages = find_trash_messages(items)
    if dry_run:
        for item in messages:
            print item["queue_id"], item["sender"], item["recipients"][0]
        return

    remove_messages([m["queue_id"] for m in messages])
    print "Removed total:", len(messages)


if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="Clean postfix queue from toxic messages")
    parser.add_argument("--dryrun", action='store_true', help="Just prints items to remove, nothing is removed")
    args = parser.parse_args()

    main(args.dryrun)
