#!/usr/bin/env python
# *-* encoding: utf8 *-*
''' Скрипт для чтения почты mailbox, объединения писем по схожей тематике и запись в стандартный лог. '''

import fcntl
import hashlib
import mailbox
import optparse
import os
import pwd
import grp
import sys
import time

from os import stat
from os import chown

USERS_MBOX = "ppc,mod,bmclient"
DIRECTORY_MBOX = "/var/spool/mail"
DIRECTORY_MAIL = "/var/mail"
LOG_PATH = "/var/log/yandex/cron-mailer"

class Mailer:
    def __init__(self, fname, debug=False):
        self.uid, self.gtid = stat(fname).st_uid, stat(fname).st_gid #получаем uid и gid, чтобы  восстановить после чистки привелегии на файл 
        self.filename = fname
        self.name = os.path.basename(fname)
        self.debug = debug
        self.fix_permitions()
        self.subhashs = dict() #{<md5sum subject>: [<position letter>, <position letter>]}
        self.mbox = mailbox.mbox(self.filename, create=False) 
        self.msgs = dict() #{<md5sum subject>: {'subject': <name>, 'data': [{'date': [<date1>, <date2>], 'body': <text body>}]}, <md5sum subject>: ...}  
        self.total_aggregate = 0 #количество писем попавших в уже созданную тему
        self.total_mails = 0 #общее количество обработанных писем
        self.queue_deleted_mails = list() #очередь писем на удаление

    def fix_permitions(self):
        for dir_name in [DIRECTORY_MAIL, DIRECTORY_MBOX]:
            for user in USERS_MBOX.split(','):
                path = os.path.join(dir_name, user)
                try:
                    uid = pwd.getpwnam(user).pw_uid
                    gid = grp.getgrnam("mail").gr_gid
                except Exception as _:
                    continue
                if os.path.exists(path):
                    os.chown(path, uid, gid)
        return

    def get_subhash(self):
        for position, message in enumerate(self.mbox):
            subject = message.get('Subject', 'EMPTY')
            subhash = hashlib.md5(subject).hexdigest()
            if self.subhashs.has_key(subhash):
                self.subhashs[subhash].append(position)
                self.total_aggregate += 1
                continue
            self.subhashs.setdefault(subhash, [position,])
        if self.debug: print self.subhashs
        return

    def remove_mails(self):
        if len(self.queue_deleted_mails) == 0: return
        for i in self.queue_deleted_mails:
            self.mbox.discard(i)
            self.mbox.flush()
        return

    def run_group_mails(self, clear_mailbox=False):
        self.mbox.lock()
        self.get_subhash()
        for subhash in self.subhashs:
            positions = self.subhashs.get(subhash)
            subject = None
            bodyhashs = dict()
            for number in positions:
                mail = self.mbox[number]
                if not subject:  
                    self.msgs[subhash] = {'subject': mail.get('Subject', 'EMPTY').rstrip(),
                                          'data': list()}
                body = mail.get_payload()
                if type(body) is not str:
                    self.queue_deleted_mails.append(number)
                    continue

                if len(body) > 4096:
                    b1hash = hashlib.md5(body[:2048] + body[-2048:]).hexdigest()
                else:
                    b1hash = hashlib.md5(body).hexdigest()
                date = mail.get('Date') 
                if bodyhashs.has_key(b1hash):
                    bodyhashs[b1hash]['dates'].append(date)
                else:
                    bodyhashs[b1hash] = {'dates': [date,], 'body': body}
                self.total_mails += 1
                self.queue_deleted_mails.append(number)
            self.msgs[subhash]['data'].extend(bodyhashs.values())
            self.msgs[subhash]['len'] = len(positions)
        if clear_mailbox: self.remove_mails()
        self.mbox.close()
        chown(self.filename, self.uid, self.gtid)
        return

    def __call__(self):
        print "#START ITERATION", time.ctime()
        print "#TOTAL MAILS", self.total_mails
        print "#AGGREGATE MAILS ", self.total_aggregate
        for subhash in self.msgs:
            print "##START SUBJECT", subhash
            print "##SUBJECT\n", self.msgs[subhash]['subject']
            print "##COUNT MAILS", self.msgs[subhash]['len']
            for group in self.msgs[subhash]['data']:
                print ""
                print "###DATETIME\n", '\n'.join(group['dates'])
                print "###BODY\n", group['body'].rstrip()
            print ""
            print "##END SUBJECT", subhash
            print ""
        print "#END ITERATION"
        print "\n"

def callList(option, opt, value, parser):
    value = [ os.path.basename(v) for v in value.split(',') ]
    setattr(parser.values, option.dest, value)

if __name__ == '__main__':

    usage = "usage: {0}".format(sys.argv[0])
    parser = optparse.OptionParser(usage=usage)
    parser.add_option("-d", "--debug",
                      action="store_true", dest="debug",
                      help="run debug mode(default: False)")
    parser.add_option("-e", "--directory-mailbox",
                      action="store", dest="dir_mbox", default=DIRECTORY_MBOX,
                      help="path mail directory(default: {0})".format(DIRECTORY_MBOX))
    parser.add_option("-u", "--user-mailbox", 
                      action="callback", dest="users_mbox", default=USERS_MBOX,
                      type="string", callback=callList,
                      help="users for parse mailbox(defaut: {0})".format(USERS_MBOX))
    parser.add_option("-p", "--print",
                      action="store_true", dest="print_stdout",
                      help="print stdout in console(default: False)")
    parser.add_option("-c", "--clear-mailbox",
                      action="store_true", dest="clear_mailbox",
                      help="remove mails in mailbox after aggregate(default: False)")
    parser.add_option("-l", "--log-path",
                      action="store", dest="log_path", default=LOG_PATH,
                      help="path for save mails(default: {0})".format(LOG_PATH))
    (opts, args) = parser.parse_args()
    
    # избегаем повторного запуска при работающей копии
    lock_file = open('/tmp/{0}.lock'.format(os.path.basename(sys.argv[0])), 'w')
    fcntl.lockf(lock_file, fcntl.LOCK_EX | fcntl.LOCK_NB)

    # смотрим список почты mbox для пользователей opts.users_mbox(USERS_MBOX)
    mbox_list = [ os.path.join(opts.dir_mbox, i) for i in os.listdir(opts.dir_mbox) 
            if not i.endswith('.lock') and i in opts.users_mbox ]
    mbox_list = [ i for i in mbox_list if os.path.isfile(i) ]
    if opts.debug:
        print mbox_list
    
    if not mbox_list:
        raise ValueError("Dont found mailbox files in {0}. Exit". format(opts.dir_mbox))

    # проверяем наличие дирректории для логов opts.log_path(LOG_PATH)
    if not os.path.exists(opts.log_path):
        os.mkdir(opts.log_path, 0755)

    for mbox in mbox_list:
        try:
            mx = Mailer(mbox, debug=opts.debug)
            mx.run_group_mails(clear_mailbox=opts.clear_mailbox)

            if not opts.print_stdout:
                tmp = sys.stdout
                name_log_file = '.'.join([mx.name, 'log'])
                path_log_file = os.path.join(opts.log_path, name_log_file)
                with open(path_log_file, 'aw') as f1le:
                    sys.stdout = f1le
                    mx()
                sys.stdout = tmp
            else:
                mx()

        except Exception as err:
            print >> sys.stderr, "ERROR {1}: {0}".format(err, os.path.basename(sys.argv[0]))
            if opts.debug: raise

    lock_file.close()
