#!/usr/bin/python2
#-*- coding: utf-8 -*-
# kate: space-indent on; indent-width 4; replace-tabs on;
#
import os, os.path, sys, re, time, ConfigParser, base64
from subprocess import check_call, STDOUT
from traceback import format_exception
from email.header import decode_header
from base64 import urlsafe_b64encode
from quopri import decodestring
from tempfile import mkstemp

RETRY_COUNT = 3

global cfg, QID

cfg = ConfigParser.SafeConfigParser()
cfg.optionxform = str
cfg_file = sys.argv[2] if len(sys.argv) > 2 else 'WORKING_DIR/fbl.conf'
cfg.read(cfg_file)

LOGFILE = cfg.get('general', 'errorsLogFile') if cfg.has_option('general', 'errorsLogFile') else '/logs/fbl.log'

IYComplaintTag = "IY-Complaint: 1\n"
IYComplaintLen = len(IYComplaintTag)
DOMAIN_RE = re.compile(r'(?:(?:[a-z0-9](?:[a-z0-9-]{0,61}[a-z0-9])?|xn\-\-[a-z0-9-]+)\.)+(?:xn\-\-[a-z0-9-]+|[a-z]+)', re.I)
EMAIL_RE = re.compile(r'[\w\+\-\.]+\@{0}'.format(DOMAIN_RE.pattern), re.I)
YANDEX_DOMAIN_RE = re.compile(r'(?:[a-z0-9](?:[a-z0-9-]{0,61}[a-z0-9])?\.)*\b(?:yandex|ya|narod|yandex-team)\.(?:[a-z]{2,4})', re.I)
YANDEX_EMAIL_RE = re.compile(r'[\w\+\-\.]+\@{0}'.format(YANDEX_DOMAIN_RE.pattern), re.I)
QID_RE = re.compile(r'from (\S+) \(\S+ \[[\d:\.a-f]+\]\)\s+by [a-z\-]+[0-9][0-9a-z-]+?\.c?mail\.yandex\.net.+?with E?SMTPS?A? id (\w{10}-\w{8});', re.S)

def get_traceback():
    exc_type, exc_value, exc_traceback = sys.exc_info()
    tb = ''
    for step in format_exception(exc_type, exc_value, exc_traceback):
        try:
            tb += "\t" + step.strip() + "\n"
        except:
            pass
    return tb

def writelog(msg, isTB = False):
    if not msg: return
    try:
        tb = "\n"
        if isTB:
            tb += get_traceback()
        f = open(LOGFILE, 'a+t')
        f.write(time.strftime("[%Y-%m-%d %H:%M:%S]: ") + msg + tb)
        f.close()
    except Exception, e:
        print >>sys.stderr, "Writelog error: %s.%s" % (str(e), get_traceback())
        print >>sys.stderr, time.strftime("[%Y-%m-%d %H:%M:%S]: ") + msg + tb
        sys.stderr.flush()

def decode_helper(m):
    s = u''
    if m and m.group(1) and m.group(3):
        s = unicode(decodestring(m.group(3), header=True) if m.group(2).lower() == 'q' else urlsafe_b64encode(m.group(3)), 'utf-8', 'ignore')
    return s

def decodeHeader(s):
    s1 = re.sub(r'=\?([A-Za-z0-9-]+)\?([QB])?\?([A-Za-z0-9\/+]+=*)\?=', decode_helper, s)
    s1 = re.sub(r'\r?\n\s?', r'', s1)
    return s1

def getAddresses(s):
    labels, emails = '', []
    for eml in re.findall(u'<?\s*([^\s<>,]+@[^\s<>,]+)\s*>?', s):
        emails.append(eml)
    labels = re.sub(u'[\"\',]', u'', re.sub(u'\s*<?\s*[^\s<>,]+@[^\s<>,]+\s*>?', u'', s))
    return decodeHeader(labels), ', '.join(emails) if len(emails) > 0 else ''

def get(url):
    for i in range(RETRY_COUNT):
        try:
            f = urllib2.urlopen(url)
            if f:
                return f.read()
            else:
                writelog('GET request #%d response is empty!' % (i + 1))
                continue
        except urllib2.URLError, e:
            writelog('HTTP GET request (attempt #%d) failed: %s\n' % (i + 1, e.reason), True)
            continue
        except urllib2.HTTPError, e:
            writelog('HTTP GET request (attempt #%d) failed (code=%s): %s\n' % (i + 1, e.code, e.reason), True)
            continue
        except Exception, e:
            writelog('HTTP GET request #%d failed: %s\n' % (i + 1, str(e)), True)
            continue
    return ""

def post(url, data = '', headers = {}):
    for i in range(RETRY_COUNT):
        try:
            r = urllib2.Request(url = url, data = data, headers = headers)
            f = urllib2.urlopen(r)
            if f:
                return f.read()
            else:
                writelog('POST request #%d response is empty!' % (i + 1))
                continue
        except urllib2.URLError, e:
            writelog('HTTP POST request (attempt #%d) failed: %s\n' % (i + 1, e.reason), True)
            continue
        except urllib2.HTTPError, e:
            writelog('HTTP POST request (attempt #%d) failed (code=%s): %s\n' % (i + 1, e.code, e.reason), True)
            continue
        except Exception, e:
            writelog('HTTP POST request #%d failed: %s\n' % (i + 1, str(e)), True)
            continue
    return ""

def getMsg(entity, conttype):
    m = None
    if conttype.startswith('multipart'):
        if re.match(r'^\s*$', entity) or re.search(r'<b>mess:</b>\s', entity, re.I|re.S) or not re.search(r'^Content-Type:\s*message/rfc822\s*$', entity, re.I|re.M):
            return '', ''
        entity = entity.lstrip()
        m = re.match(r'^\S+.+?\s*\n\s*\n\s*((\S+.*?)\s*\n\s*\n\s*\S+.*)$', entity, re.I|re.S)
    elif conttype.startswith('message/rfc822'):
        m = re.match(r'^\s*((\S+.*?)\s*\n\s*\n\s*\S+.*)$', entity, re.I|re.S)
    if m:
        return m.group(1), m.group(2)
    else:
        return '', ''

def parseLetter(sourceStream):
    global cfg, QID
    is_header, buf, nbuf, header, body, boundarySep = 1, '', 0, '', '', ''
    f_out, tmp_filename = mkstemp(dir=cfg.get('general', 'tempFileDir', vars={'tempFileDir': '/tmp'}), text=True)
    SO_DOMAIN = tag = QID = s = s1 = s2 = ''
    origFields, entities = {'from': '', 'to': '', 'date': '', 'content-type': ''}, []
    def rf(m):
        return '%s%s' % (m.group(1) if m.groups > 1 else '', m.group(3) if m.groups > 3 else '')
    fh_out = os.fdopen(f_out, 'wt')
    for line in sourceStream:
        print >>fh_out, line.rstrip()
        if is_header:
            if line.startswith('IY-Complaint: ') or re.match(r'^From \w+', line):
                continue
            m = re.match(r'^\s+(\S+.*?)\s*$', line)
            if m:
                s += "\n\t%s" % m.group(1)
            else:
                if tag:
                    if tag == 'date':
                        origFields[tag] = s
                        buf += IYComplaintTag
                        nbuf += IYComplaintLen
                    elif tag == 'content-type':
                        origFields[tag] = s
                        m = re.search(r'\bboundary\s*=\s*([\"\'])([^\"\']*?)\1', s, re.I)
                        boundarySep = m.group(2) if m else ''
                        m = re.search(r'\bboundary\s*=\s*(\S+)', s, re.I)
                        if not boundarySep:
                            boundarySep = m.group(1) if m else ''
                        boundarySep = re.sub(r'\r|\n', r'', boundarySep)
                    elif tag == 'from' and not origFields['from'] or tag == 'to' and not origFields['to']:
                        m = re.search(r'\b({0})'.format(EMAIL_RE.pattern), s, re.I)
                        if m:
                            origFields[tag] = m.group(1)
                    elif tag == 'received':
                        s1 = s2 = s
                        s2 = re.sub(r'([\s\(]+)(%s(?:\s\(?\[\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\]\)?)?)([;:\)]?\s+)' % YANDEX_DOMAIN_RE.pattern, rf, s2, re.I)
                        s2 = re.sub(r'(\s+<?)({0})(>?;?\s+)'.format(YANDEX_DOMAIN_RE.pattern), rf, s2, re.I)
                        if re.search(r'by\s+{0}\b'.format(YANDEX_DOMAIN_RE.pattern), s1, re.I):
                            m = re.search(r'from\s+.+?({0}\s+\[[0-9\.\:]+\])'.format(DOMAIN_RE.pattern), s2, re.I)
                            if m:
                                SO_DOMAIN = m.group(1)
                            else:
                                m = re.search(r'from\s+({0})\b'.format(DOMAIN_RE.pattern), s2, re.I)
                                if m:
                                    SO_DOMAIN = m.group(1)
                            m = QID_RE.search(s1)
                            if m:
                                SO_DOMAIN, QID = m.group(1), m.group(2)
                m = re.match(r'^([\w\-]+)\s*:\s+(.*?)\s*$', line, re.I|re.M)
                if m:
                    tag, s = m.group(1).lower(), m.group(2)
                elif re.match(r'^\s*$', line) and is_header:
                    is_header, tag = 0, ''
            buf += line
            nbuf += len(line)
            continue
        if nbuf <= (cfg.get('general', 'maxMsgSize') if cfg.has_option('general', 'maxMsgSize') else 64 * 1024):
            buf += line
            nbuf += len(line);
        if boundarySep:
            if line.startswith('--') and line.find(boundarySep) >= 0:
                entities.append('')
                continue
            if len(entities) > 0:
                entities[-1] += line
        else:
            if len(entities) == 0:
                entities.append('')
            entities[0] += line
    sourceStream.close()
    fh_out.close()
    return entities, origFields

def get_ids_by_email(login, bb_host_type):
    if not bb_host_type:
        bb_host_type = 0
    BB_Hosts = ['blackbox-mail.yandex.net', 'blackbox.yandex-team.ru']
    buf = get('%s/blackbox?method=userinfo&login=%s&sid=smtp&userip=127.0.0.1&dbfields=subscription.suid.-&emails=getall' % (BB_Hosts[bb_host_type], login))
    m = re.search(r'<dbfield id="subscription.suid.-">(\d+)<\/dbfield>', buf)
    suid = m.group(1) if m else ''
    m = re.search(r'^<uid.*?>(\d+)</uid>', buf, re.M)
    uid = m.group(1) if m else ''
    return suid, uid

def fillFields(entities, origFields):
    global QID
    fields = {}
    for entity in entities:
        m = re.search(r'^User-Agent:\s*(.*)', entity, re.I|re.M)
        if m and re.search(r'^Content-Type: message/feedback-report', entity, re.M|re.I):
            fields['user-agent'] = m.group(1)
        body, header = getMsg(entity, origFields['content-type'])
        if header: break
    fields['mail'], fields['body'] = urlsafe_b64encode(header), urlsafe_b64encode(body)
    try:
        fields['data'] = int(time.mktime(time.strptime(' '.join(origFields['date'].split()), "%a, %d %b %Y %H:%M:%S %Z")))
    except:
        try:
            fields['data'] = check_call('date -d "{0}" +%s'.format(origFields['date']), stdout=STDOUT, stderr=STDOUT, shell=True, universal_newlines=True).strip()
        except Exception, e:
            writelog('Exception while parsing of Date: header: %s' % str(e), True)
    fields['unixtime'] = time.time()
    fromaddr = qid = mfrm = ''
    m = re.search(r'From:\s+(.*?)', header, re.I|re.M)
    if m:
        fromaddr = getAddresses(m.group(1))[1]
    m = QID_RE.search(header)
    if m:
        qid = m.group(2)
    fields['queueid'] = qid if qid else (QID if QID else '')
    m = re.search(r'^From\s*:\s+.*?\b({0})$'.format(YANDEX_EMAIL_RE.pattern), header, re.M)
    if m and m.group(1):
        mfrm = m.group(1)
    else:
        m = re.search(r'^Return-Path\s*:\s+.*?\b({0})$'.format(YANDEX_EMAIL_RE.pattern), header, re.M)
        if m and m.group(1):
            mfrm = m.group(1)
    fields['login'] = mfrm if mfrm else fromaddr
    if fields['login']:
        fields['login'] = re.sub(r'\@\S+$', r'', fields['login'])
    if origFields['to'] == 'fbl-arf@yandex.ru':
        if 'source' not in fields:
            fields['source'] = 'fblin'
        fields['provider'] = origFields['from']
    else:
        m = re.search(r'exchange-(spam|ham)-reports\@yandex-team.ru', origFields['to'])
        if m:
            fields['type'] = 'antifoo' if m.group(1) == 'ham' else 'foo'
            if fields['login']:
                fields['suid'], fields['uid'] = get_ids_by_email(fields['login'], 1)
            #writeLog("MS Exchange complaint processing error: $@") if $@;
            if 'login' not in fields:
                fields['login'] = '999Exchange'
            if 'folder' not in fields:
                fields['folder'] = 'exchange'
            if 'source' not in fields:
                fields['source'] = 'exchange'
    return fields

def makeQuery(fields):
    if 'mail' not in fields or not fields['mail'] or 'body' not in fields or fields['body']:
        return
    if 'stid' not in fields:
        fields['stid'] = '100100999_99999_100100999_400999999999'
    if 'suid' not in fields:
        q['suid'] = '999999999999999'
    if 'uid' not in fields:
        fields['uid']  = '999999999999999'
    if 'karma' not in fields:
        fields['karma'] = '0'
    if 'karma_status' not in fields:
        fields['karma_status'] = '0'
    if 'seen' not in fields:
        fields['seen'] = 'yes'
    if 'type' not in fields:
        fields['type'] = 'foo'
    if 'ip' not in fields:
        fields['ip'] = '1.1.1.1'
    return "%s%s?source=%s&login=%s" % (cfg.get('general', 'complaintsHost'), cfg.get('general', 'complaintsUri'), fields['source'], fields['login']), '%s?%s' % (cfg.get('general', 'complaintsUri'), '&'.join(map(lambda (k, v): "{0}={1}".format(k, v), fields.iterkeys())))

def send2complserver(q):
    if 'mail' not in q or not q['mail'] or 'body' not in q or q['body']:
        return
    if 'stid' not in q:
        q['stid'] = '100100999_99999_100100999_400999999999'
    if 'suid' not in q:
        q['suid'] = '999999999999999'
    if 'uid' not in q:
        q['uid']  = '999999999999999'
    if 'karma' not in q:
        q['karma'] = '0'
    if 'karma_status' not in q:
        q['karma_status'] = '0'
    if 'seen' not in q:
        q['seen'] = 'yes'
    if 'type' not in q:
        q['type'] = 'foo'
    if 'ip' not in q:
        q['ip'] = '1.1.1.1'
    post("%s%s?source=%s&login=%s" % (cfg.get('general', 'complaintsHost'), cfg.get('general', 'complaintsUri'), q['source'], q['login']), '%s?%s' % (cfg.get('general', 'complaintsUri'), '&'.join(map(lambda (k, v): "{0}={1}".format(k, v), q.iterkeys()))))

def main():
    SOFBL_FILE = sys.argv[1] if len(sys.argv) > 1 else '-'
    if SOFBL_FILE == '-':
        fh = sys.stdin
    else:
        try:
            fh = open(SOFBL_FILE)
        except Exception, e:
            writelog('Error while opening file "%s": %s' % (SOFBL_FILE, str(e)), True)
            sys.exit(1)
    entities, origFields = parseLetter(fh)
    fields = fillFields(entities, origFields)
    send2complserver(fields)

if __name__ == "__main__":
    main()
