#!/usr/bin/env python2
# encoding: utf-8
# kate: space-indent on; indent-width 4; replace-tabs on;
#
import os, os.path, sys, re, ConfigParser
from collections import defaultdict
from urllib import unquote
from socket import gethostname
from signal import signal, SIG_IGN, SIGUSR1, SIGINT, SIGHUP, SIGCHLD, SIGPIPE, SIGQUIT, SIGABRT, SIGTERM
from time import time, mktime, strptime
from so_log_tail import LogTail
from pgdb import getPGCredentials, getPGdb

__author__ = "Yaroslav Klimik <klimiky@yandex-team.ru>"
__version__ = "1.0"

global q, data, LOG_TAIL

LOG_TAIL = None
MONTH = {'Jan': 1, 'Feb': 2, 'Mar': 3, 'Apr': 4, 'May': 5, 'Jun': 6, 'Jul': 7, 'Aug': 8, 'Sep': 9, 'Oct': 10, 'Nov': 11, 'Dec': 12}
cfg = ConfigParser.SafeConfigParser()
cfg.optionxform = str
cfg_file = sys.argv[1] if len(sys.argv) > 1 else 'PARSER_DIR/complaints-accesschecklog-parser.ini'
cfg.read(cfg_file)
q = {'general': dict(cfg.items('general')), 'db': dict(cfg.items('db')), 'log-tail': dict(cfg.items('log-tail'))}

def initData():
    global q, data
    data = defaultdict(lambda: defaultdict(lambda: defaultdict(lambda: defaultdict(lambda: defaultdict(int)))))
    getPGCredentials(q['db'])

def send_stats2db(data):
    global q, LOG_TAIL
    host, stat_data = gethostname(), []
    try:
        db = getPGdb(q['db'])
        pg_cursor = db.cursor()
        for (ds, sources_dict) in data.iteritems():
            for (source, types) in sources_dict.iteritems():
                for (footype, seens) in types.iteritems():
                    for (seen, c) in seens.iteritems():
                        ft = unquote(footype)
                        for i in range(3):
                            try:
                                pg_cursor.execute("""INSERT INTO compl_stat (date, host, source, footype, seen, total, empty_ip, with_mail,
                                            min_time, max_time, accum_time, http_200, http_4xx, http_499, http_5xx, http_other)
                                        VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
                                        ON CONFLICT (date, host, source, footype, seen)
                                        DO UPDATE SET total = compl_stat.total + EXCLUDED.total, empty_ip = compl_stat.empty_ip + EXCLUDED.empty_ip,
                                            with_mail = compl_stat.with_mail + EXCLUDED.with_mail, min_time = LEAST(compl_stat.min_time, EXCLUDED.min_time),
                                            max_time = GREATEST(compl_stat.max_time, EXCLUDED.max_time), accum_time = compl_stat.accum_time + EXCLUDED.accum_time,
                                            http_200 = compl_stat.http_200 + EXCLUDED.http_200, http_4xx = compl_stat.http_4xx + EXCLUDED.http_4xx,
                                            http_499 = compl_stat.http_499 + EXCLUDED.http_499, http_5xx = compl_stat.http_5xx + EXCLUDED.http_5xx,
                                            http_other = compl_stat.http_other + EXCLUDED.http_other WHERE compl_stat.date = %s AND compl_stat.host = %s AND
                                            compl_stat.source = %s AND compl_stat.footype = %s AND compl_stat.seen = %s""",
                                        (ds, host, source, ft, seen, c['total'], c['empty_ip'], c['with_mail'], c['min_time'], c['max_time'], c['accum_time'], c['http_200'], c['http_4xx'], c['http_499'], c['http_5xx'], c['http_other'], ds, host, source, ft, seen))
                                db.commit()
                                break
                            except Exception, e:
                                if hasattr(e, "pgcode") and hasattr(e, "pgerror"):
                                    LOG_TAIL.error("DB exception (code=%s): %s" % (e.pgcode, e.pgerror), True)
                                else:
                                    LOG_TAIL.error("Exception while DB operations in send_stats2db: %s" % str(e), True)
                                db = getPGdb(q['db'])
                                pg_cursor = db.cursor()
        pg_cursor.close()
    except Exception, e:
        if hasattr(e, "pgcode") and hasattr(e, "pgerror"):
            LOG_TAIL.error("DB exception (code=%s): %s" % (e.pgcode, e.pgerror), True)
        else:
            LOG_TAIL.error("Exception while DB operations in send_stats2db: %s" % str(e), True)

def gotSignal(signum, frame):
    global LOG_TAIL, data
    if data and len(data.keys()) > 0:
        send_stats2db(data)
    sys.exit(1)

signal(SIGCHLD, SIG_IGN)
signal(SIGPIPE, gotSignal)
signal(SIGQUIT, gotSignal)
signal(SIGABRT, gotSignal)
signal(SIGTERM, gotSignal)
signal(SIGINT,  gotSignal)
signal(SIGHUP,  gotSignal)

initData()
LOG_TAIL = LogTail(cfg_file)
lastSendTime, sendTimePeriod = time(), int(q['general'].get('sendTimePeriod', 60))
for record in LOG_TAIL():
    if not record:
        continue
    sf, source, footype, ip, mail, seen, t0 = record.split(), '', '', '', 0, '', 0
    da = sf[0][1:sf[0].find(':', 10)].split('/')
    ds = "%d-%02d-%02d" % (int(da[2]), int(MONTH[da[1]]), int(da[0]))
    da = sf[0].split(':')[1:]
    t1 = int(mktime(strptime("%s %s:%s:%s" % (ds, da[0], da[1], da[2]), '%Y-%m-%d %H:%M:%S')))
    if sf[5].strip('/') == 'fbl-in':
        source, seen, mail = 'fblin', 'seen_yes', 1
    else:
        for p in map(lambda s: s.split('=', 1), sf[5][7:].split('&')):
            if p[0] == 'source':
                source = p[1]
            elif p[0] == 'type':
                footype = p[1]
            elif p[0] == 'ip':
                ip = p[1]
            elif p[0] == 'seen':
                seen = 'seen_' + p[1]
            elif p[0].startswith('otrs'):
                source = 'otrs'
            elif p[0] == 'mail' or sf[4] == '"POST':
                mail = 1
            elif p[0] == 'unixtime' and p[1] and int(p[1]) > t0:
                t0 = int(p[1])
            elif p[0] == 'data' and p[1] and not t0:
                t0 = int(p[1])
    if not t0: t0 = t1
    dt = (t1 - t0) if t1 > t0 else 0
    if source.startswith('fastsrv') and footype.startswith('flag'):
        footype = 'flag'
    data[ds][source][footype][seen]['total'] += 1
    if mail: data[ds][source][footype][seen]['with_mail'] += 1
    if not ip: data[ds][source][footype][seen]['empty_ip'] += 1
    if sf[7].isdigit():
        code = int(sf[7])
        if sf[7] == '200' or sf[7] == '499':
            http_code = sf[7]
        elif code >= 400 and code < 499:
            http_code = '4xx'
        elif code >= 500 and code < 600:
            http_code = '5xx'
        else:
            http_code = 'other'
        data[ds][source][footype][seen]['http_' + http_code] += 1
    if 'min_time' not in data[ds][source][footype][seen] or data[ds][source][footype][seen]['min_time'] > dt:
        data[ds][source][footype][seen]['min_time'] = dt
    if 'max_time' not in data[ds][source][footype][seen] or data[ds][source][footype][seen]['max_time'] < dt:
        data[ds][source][footype][seen]['max_time'] = dt
    data[ds][source][footype][seen]['accum_time'] += dt
    if time() - lastSendTime > sendTimePeriod:
        data2 = data.copy()
        initData()
        send_stats2db(data2)
        lastSendTime = time()

send_stats2db(data)
