#!/usr/bin/env python2
# encoding: utf-8
# kate: space-indent on; indent-width 4; replace-tabs on;
#
import os, os.path, sys, errno, re
from collections import defaultdict
from bson.int64 import Int64
from datetime import date
from time import time, mktime, strftime, strptime
from signal import signal, SIG_IGN, SIGUSR1, SIGINT, SIGHUP, SIGCHLD, SIGPIPE, SIGQUIT, SIGABRT, SIGTERM
from so_log_tail import LogTail
from mongodb import loadMongoDbCredentials, getMongoDB

__author__ = "Yaroslav Klimik <klimiky@yandex-team.ru>"
__version__ = "1.0"

global q, route, data, data2, LOG_TAIL, MONGO, procCnt

YEAR = strftime("%Y")
LOG_TAIL = None
MONGO = {
    'db':      'rules',
    'hosts':   'sas-8bydc7gbz3h070ke.db.yandex.net,vla-hwmeehtmq450wvke.db.yandex.net,man-t5y0rvgx2wm1mer4.db.yandex.net',
    'port':    27018,
    'user':    'solog',
    'timeout': 70000
}
loadMongoDbCredentials(MONGO)
cfg = ConfigParser.SafeConfigParser()
cfg.optionxform = str
cfg_file = sys.argv[1] if len(sys.argv) > 1 else 'PARSER_DIR/abuse-deliverylog-parser.ini'
cfg.read(cfg_file)
q = {'general': dict(cfg.items('general')), 'db': dict(cfg.items('db')), 'log-tail': dict(cfg.items('log-tail'))}

ERRORLOG = "ERRORLOG_DIR/abuse-deliverylog-parser.log"
LOG_FILE = 'LOG_DIR/abuse.delivery.log'
MONTH = {'Jan': 1, 'Feb': 2, 'Mar': 3, 'Apr': 4, 'May': 5, 'Jun': 6, 'Jul': 7, 'Aug': 8, 'Sep': 9, 'Oct': 10, 'Nov': 11, 'Dec': 12}
MESS_RE = re.compile(r'^(?:{0})?\s*([A-Za-z]+\s+\d+\s+\d\d\:\d\d\:\d\d)'.format(q['log-tail'].get('sep', 'mess:')), re.M)
ROUTE_RE = re.compile(r'^compl_route:\s+([a-z]+)$', re.M)
TYPE_RE = re.compile(r'^compl_type\:\s+(\w+)$', re.M)
DRES_RE = re.compile(r'^spam: (\w+)', re.M)

def initData():
    global data, plotnik_data
    data = defaultdict(lambda: defaultdict(lambda: defaultdict(lambda: defaultdict(lambda: defaultdict(Int64)))))
    plotnik_data = defaultdict(lambda: defaultdict(Int64))

def send_rules2db(data):
    global LOG_TAIL, MONGO, route
    try:
        db = getMongoDB(MONGO)
        for route in data.keys():
            for (t, rule_stat) in data[route]['detailed'].iteritems():
                for (rule, stat) in rule_stat.iteritems():
                    db['detailed_Rules_%s' % route].update_one({'rule': rule, 'time': t}, {'$inc': dict(stat)}, upsert=True)
            for (d, rules_stat) in data[route]['daily'].iteritems():
                for (rule, stat) in rules_stat.iteritems():
                    db['Rules_%s' % route].update_one({'rule': rule, 'date': d}, {'$inc': dict(stat)}, upsert=True)
    except Exception, e:
        LOG_TAIL.error("Exception while DB operations in send_rules2db: %s" % str(e), True)

def send_plotnik(stats):
    buf = ""
    for timestamp, info in sorted(stats.iteritems()):
        for name, count in sorted(info.iteritems()):
            buf += "%s %f %d\n" % (name, float(count), timestamp)
    print buf

def gotSignal(signum, frame):
    global LOG_TAIL, data, plotnik_data
    if data and len(data.keys()) > 0:
        send_rules2db(data)
    if plotnik_data and len(plotnik_data.keys()) > 0:
        send_plotnik(plotnik_data)
    sys.exit(1)

def gotUsr1Signal(signum, frame):
    global procCnt, data2
    if procCnt > 0:
        procCnt -= 1
    data2 = ()
    sys.exit(1)

signal(SIGCHLD, SIG_IGN)
signal(SIGPIPE, gotSignal)
signal(SIGQUIT, gotSignal)
signal(SIGABRT, gotSignal)
signal(SIGTERM, gotSignal)
signal(SIGINT,  gotSignal)
signal(SIGHUP,  gotSignal)
initData()
LOG_TAIL = LogTail(cfg_file)
lastSendTime, sendTimePeriod, timeStep = time(), int(q['general'].get('sendTimePeriod', 60)), int(q['general'].get('timeStep', 600))
procCnt, maxProcesses = 1, int(q['general'].get('maxProcesses', 1))
curDate = date.today()
for record in LOG_TAIL():
    valid, t, d = False, time(), curDate
    try:
        m = MESS_RE.match(record)
        if m:
            try:
                t = int(mktime(strptime("{0} {1}".format(YEAR, m.group(1)), "%Y %b %d %H:%M:%S")))
                if t >= time() + 86400:
                    t = time()
                d = date.fromtimestamp(t)
                valid = True
            except Exception, e:
                LOG_TAIL.error("Exception: %s" % str(e), True)
        elif record.strip():
            LOG_TAIL.error("Strange record: '%s'" % record)
        if not valid:
            continue
    except Exception, e:
        LOG_TAIL.error("Exception '%s' while checking log record: %s" % (str(e), record), True)
    t, pf = int(t / 60) * 60, 0
    t0 = int(t / timeStep) * timeStep
    rules = ''.join(re.findall(r'^r_(?:sp|dl|nl): (.+)', record, re.M))
    m = TYPE_RE.search(record)
    compl_type = m.group(1) if m else 'foo'
    m = DRES_RE.search(record)
    dres = m.group(1) if m else 'no'
    if dres == 'yes' and compl_type == 'foo' or (dres == 'no' or dres == 'dlv') and compl_type == 'antifoo':
        continue
    m = ROUTE_RE.search(record)
    route = (m.group(1) if m else 'In').capitalize()
    if re.search(r'\bPERSONAL_CORRECT\b', rules):
        pf += 1
    for rule in re.split(r',\s*', rules):
        if not rule: continue
        m = re.match(r'^(\w+)', rule)
        if m:
            if not pf:
                data[route]['detailed'][t0][m.group(1)]['cmpl_ham_nopf' if compl_type == 'antifoo' else 'cmpl_spam_nopf'] += 1
                data[route]['daily'][d.isoformat()][m.group(1)]['cmpl_ham_nopf' if compl_type == 'antifoo' else 'cmpl_spam_nopf'] += 1
                plotnik_data[t][compl_type] += 1
            data[route]['detailed'][t0][m.group(1)]['cmpl_ham' if compl_type == 'antifoo' else 'cmpl_spam'] += 1
            data[route]['daily'][d.isoformat()][m.group(1)]['cmpl_ham' if compl_type == 'antifoo' else 'cmpl_spam'] += 1
    if time() - lastSendTime > sendTimePeriod and procCnt <= maxProcesses:
        data2 = data.copy()
        plotnik_data2 = plotnik_data.copy()
        initData()
        if maxProcesses <= 1:
            send_rules2db(data2)
        else:
            pid = None
            while True:
                try:
                    pid = os.fork()
                    if not pid:
                        send_rules2db(data2)
                        os.kill(os.getppid(), SIGUSR1)
                        os._exit(0)
                    procCnt += 1
                    break
                except OSError, e:
                    if e.errno == errno.EAGAIN:
                        sleep(3)
                    else:
                        LOG_TAIL.error("Unable to spawn child process for sending rules statistics: %s" % str(e), True)
                        break
        send_plotnik(plotnik_data2)
        lastSendTime = time()

send_rules2db(data)
send_plotnik(plotnik_data)
