#!/usr/bin/env python
# encoding: utf-8
# kate: space-indent on; indent-width 4; replace-tabs on;
#
import os, sys, re, psycopg2
from traceback import format_exception
from collections import defaultdict
from datetime import datetime
from socket import gethostname
from paysender_rules import parse_rule
from common import get_traceback
from so_log_tail import LogTail
from mongodb import getPGCredentials, getPGdb

ERRFILE = '/var/log/so-logs/psndr.log'
LOGFILE = '/u0/%s/logs/shortdlv_in.log' % gethostname()
PG = {
    "host": "psndrdb01e.db.yandex.net,psndrdb01f.db.yandex.net,psndrdb01i.db.yandex.net",
    "port": 6432,
    "db":   "psndrdb",
    "user": "psndr"
}

def log(msg, isTB = False):
    try:
        log = open(ERRFILE, 'at')
        log.write("[%s] %s%s\n" % (datetime.today().strftime("%Y-%m-%d %H:%M:%S"), msg, get_traceback() if isTB else ''))
        log.close()
    except Exception, e:
        print >>sys.stderr, "Exception while writing log: %s" % str(e)

class Info:
    ham = 0
    spam = 0
    personal_ham = 0
    personal_spam = 0

def search_paysender(re_dict, s):
    if s and s != '-':
        for (name, re_rdns) in re_dict.iteritems():
            m = re_rdns.search(s)
            if m and m.group(1):
                return name.replace("___", "-")
    return ''

def get_paysender(parts):
    name = search_paysender(rdns_regexps, parts[7]) # RDNS
    if not name:
        name = search_paysender(msgid_regexps, parts[4].strip("<>")) # MsgID
    return name

def extract_name(s):
    name = s.replace("-", "___")
    m = re.search(r'\w+', name)
    return m.group(0) if m else name

log("Parsing started")

rdns_regexps, msgid_regexps = defaultdict(str), defaultdict(str)
for pattern in parse_rule(sys.argv[1]):
    if pattern[1] == "rec-rdns":
        rdns_regexps[extract_name(pattern[0])] = re.compile("(?P<name_%s>(%s))" % (extract_name(pattern[0]), pattern[2]))
    elif pattern[1] == "message-id":
        msgid_regexps[extract_name(pattern[0])] = re.compile("(?P<name_%s>(%s))" % (extract_name(pattern[0]), pattern[2]))

stat, errors, total = defaultdict(Info), 0, 0
LOG_TAIL = LogTail(None, log=LOGFILE, id='paysenderstat')
for i, line in enumerate(LOG_TAIL()):
    try:
        #log("Line: %s" % line)
        total += 1
        parts = line.split("\t")
        fromaddr = parts[1]
        spam = parts[2]
        personal = parts[5].find("_PF") >= 0
        msgtime = parts[3].split()[0]
        sender = get_paysender(parts)
        if not sender or not fromaddr or msgtime == "1970-01-01":
            continue
        info = stat[(msgtime, fromaddr, sender)]
        if spam == "H" or spam == "D":
            info.ham += 1
            if personal:
                info.personal_ham += 1
        elif spam == "S" or spam == "M":
            info.spam += 1
            if personal:
                info.personal_spam += 1
    except Exception, e:
        log("Exception during log's row parsing: %s" % str(e), True)
        errors += 1
try:
    getPGCredentials()
    db, i = getPGdb(PG), 0
    cursor = db.cursor()
    for i, ((msgtime, fromaddr, sender), info) in enumerate(sorted(stat.iteritems())):
        try:
            msgtime = "-".join(reversed(msgtime.split(".")))
            cursor.execute("""INSERT INTO psndr (msgtime, sender, fromaddr, spam, ham, personal_spam, personal_ham) VALUES(%s, %s, %s, %s, %s, %s, %s)
                            ON CONFLICT (msgtime, sender, fromaddr) DO UPDATE SET spam = psndr.spam + EXCLUDED.spam, ham = psndr.ham + EXCLUDED.ham, personal_spam = psndr.personal_spam + EXCLUDED.personal_spam, personal_ham = psndr.personal_ham + EXCLUDED.personal_ham""",
                            (msgtime, sender, fromaddr, info.spam, info.ham, info.personal_spam, info.personal_ham))
            if i % 100 == 0:
                db.commit()
        except psycopg2.DatabaseError, e:
            log("DB psycopg2.DatabaseError (%s): %s.%s" % (e.pgcode, str(e), get_traceback()))
        except Exception, e:
            log("Error while uploading to db: %s" % str(e), True)
    db.commit()
except Exception, e:
    log("Exception during log's row parsing: %s" % str(e), True)

log("Parsing done. Total: %d. Errors: %d" % (total, errors))
