#!/usr/bin/env python2.7
# encoding: utf-8
# kate: space-indent on; indent-width 4; replace-tabs on;
#
import os, os.path, sys, errno, json, re, ConfigParser
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
from bson.int64 import Int64
from subprocess import Popen, PIPE
from time import time, strptime, strftime, mktime, localtime, sleep
from collections import defaultdict, OrderedDict
from socket import getfqdn
from signal import signal, SIG_IGN, SIGUSR1, SIGINT, SIGHUP, SIGCHLD, SIGPIPE, SIGQUIT, SIGABRT, SIGTERM
from common import getRoute
from so_log_tail import LogTail
from mongodb import loadMongoDbCredentials, getMongoDB

__author__ = "Yaroslav Klimik <klimiky@yandex-team.ru>"
__version__ = "1.0"

global cfg, q, route, data, data2, LOG_TAIL, MONGO, procCnt

YEAR = strftime("%Y")
LOG_TAIL = None
MONGO = {
    'db':      'rules',
    'hosts':   'vla-hwmeehtmq450wvke.db.yandex.net,sas-agixxin7fbr78u0o.db.yandex.net,vlx-g9q00jkxh959zk6m.db.yandex.net',
    'port':    27018,
    'user':    'solog',
    'timeout': 60000
}
loadMongoDbCredentials(MONGO)
cfg = ConfigParser.SafeConfigParser()
cfg.optionxform = str
cfg_file = sys.argv[1] if len(sys.argv) > 1 else 'PARSER_DIR/so-deliverylog-parser.ini'
cfg.read(cfg_file)
route = getRoute().capitalize()
if not route:
    route = (sys.argv[2] if len(sys.argv) > 2 else 'in').capitalize()
if re.search(r'search', route, re.I) > -1:
    route = "Sosearch"
elif re.search(r'frodo', route, re.I) > -1:
    route = "Sopassport"
elif re.search(r'checkform', route, re.I) > -1:
    route = "Socheckform"
elif re.search(r'checkmessage', route, re.I) > -1:
    route = "Socheckmessages"
elif re.search(r'statip', route, re.I) > -1:
    route = "Sostatip"
LOG = sys.argv[3] if len(sys.argv) > 3 else (cfg.get('log-tail', 'log') if cfg.has_option('log-tail', 'log') else '')
if not LOG or not os.path.exists(LOG):
    if route.lower() in ['in', 'out', 'corp']:
        LOG = "/u0/statlog/so_{0}/so_{0}.log".format(route.lower())
    else:
        print >>sys.stderr, "Unable to locate delivery.log: path '{}' does not exist for route '{}' ! Input args: {}.".format(LOG, route, sys.argv)
        sys.exit(1)
ID = sys.argv[4] if len(sys.argv) > 4 else ''
q = {'general': dict(cfg.items('general')), 'db': dict(cfg.items('db')), 'log-tail': dict(cfg.items('log-tail'))}
if 'name' in q['db'] and q['db']['name']:
    MONGO['db'] = q['db']['name']
if 'hosts' in q['db'] and q['db']['hosts']:
    MONGO['hosts'] = q['db']['hosts']
if 'port' in q['db'] and q['db']['port']:
    MONGO['port'] = int(q['db']['port'])
MESS_RE = re.compile(r'^(?:{0})?\s*([A-Za-z]+\s+\d+\s+\d\d\:\d\d\:\d\d)'.format(q['log-tail'].get('sep', 'mess:')), re.M)

def initData():
    global data
    data = defaultdict(lambda: defaultdict(lambda: defaultdict(Int64)))

def send_rules2db(data):
    global LOG_TAIL, MONGO, route
    try:
        db = getMongoDB(MONGO)
        rulesDaily = defaultdict(lambda: defaultdict(lambda: defaultdict(Int64)))
        for (t, rule_stat) in data.iteritems():
            d = strftime("%Y-%m-%d", localtime(t))
            for (rule, stat) in rule_stat.iteritems():
                try:
                    db['detailed_Rules_%s' % route].update_one({'rule': rule, 'time': t}, {'$inc': dict(stat)}, upsert=True)
                except Exception, e:
                    LOG_TAIL.error("Exception while DB operations with 'detailed_Rules_%s' collection in send_rules2db (ts=%s, rule=%s): %s" % (route, t, rule, str(e)), True)
                for (p, v) in stat.iteritems():
                    rulesDaily[d][rule][p] += v
        for (d, rules_stat) in rulesDaily.iteritems():
            for (rule, stat) in rules_stat.iteritems():
                try:
                    db['Rules_%s' % route].update_one({'rule': rule, 'date': d}, {'$inc': dict(stat)}, upsert=True)
                except Exception, e:
                    LOG_TAIL.error("Exception while DB operations with 'Rules_%s' collection in send_rules2db (date=%s, rule=%s): %s" % (route, d, rule, str(e)), True)
    except Exception, e:
        LOG_TAIL.error("Exception while DB operations in send_rules2db: %s" % str(e), True)

def gotSignal(signum, frame):
    global LOG_TAIL, data
    if data and len(data.keys()) > 0:
        send_rules2db(data)
    sys.exit(1)

def gotUsr1Signal(signum, frame):
    global procCnt, data2
    if procCnt > 0:
        procCnt -= 1
    data2 = ()
    sys.exit(1)

def prepareRecord(record):
    global q, LOG_TAIL
    valid, t, rules, rc, banst, data = False, time(), '', '', '', {}
    mess_header = record
    if "format" in q["general"] and q["general"]["format"] == "json":
        try:
            data = OrderedDict(json.loads(record.decode("koi8-r")))
        except Exception, e:
            LOG_TAIL.error("Failed to parse JSON: %s. InputData: %s." % (str(e), record), True)
        mess_header = data["mess"] if "mess" in data else ""
        rules = ' '.join(map(lambda k: data["r_{}".format(k)] if "r_{}".format(k) in data else '', ['sp', 'dl', 'nl']))
    else:
        rules = ' '.join(re.findall(r'^r_(?:sp|dl|nl): (.+)', record, re.M))
    m = MESS_RE.match(mess_header)
    if m:
        try:
            t = int(mktime(strptime("{0} {1}".format(YEAR, m.group(1)), "%Y %b %d %H:%M:%S")))
            if t >= time() + 86400:
                t = time()
            m = re.search(r',\s*R(\d+),\s*$', rules)
            if m:
                rc = int(m.group(1))
                if rc:
                    valid = True
            else:
                m = re.search(r' (BANST_\d+)(?:_M)?R(\d+),', rules)
                if m:
                    rc, banst = int(m.group(2)), m.group(1)
                    rules = rules[:m.start()]
                    valid = True
                else:
                    LOG_TAIL.error("Error: wrong format of r_nl header in log record!")
        except Exception, e:
            LOG_TAIL.error("Exception '%s' while checking log record: %s" % (str(e), record), True)
    return valid, t, rules, rc, banst

signal(SIGCHLD, SIG_IGN)
signal(SIGUSR1, gotUsr1Signal)
signal(SIGPIPE, gotSignal)
signal(SIGQUIT, gotSignal)
signal(SIGABRT, gotSignal)
signal(SIGTERM, gotSignal)
signal(SIGINT,  gotSignal)
signal(SIGHUP,  gotSignal)
initData()
LOG_TAIL = LogTail(cfg_file, log=LOG, id=ID)
lastSendTime, sendTimePeriod, timeStep = time(), int(q['general'].get('sendTimePeriod', 60)), int(q['general'].get('timeStep', 600))
procCnt, maxProcesses = 1, int(q['general'].get('maxProcesses', 1))
for record in LOG_TAIL():
    valid, t, rules, rc, banst = prepareRecord(record)
    if not valid:
        LOG_TAIL.error("Invalid record: '%s'" % record)
        continue
    t = int(t / 60) * 60
    t0 = int(t / timeStep) * timeStep
    if re.search(r'\b(PERSONAL_CORRECT)\b', rules):
        if rc == 4:
            rc = 127
        elif rc == 1:
            rc = 8
    if banst:
        data[t0][banst]['R%s' % rc] += 1
    for rule in re.split(r',\s*', rules):
        if not rule:
            continue
        m = re.match(r'^(\w+)', rule)
        if m:
            data[t0][m.group(1)]["R%s" % rc] += 1
        else:
            LOG_TAIL.error("Invalid rule: '%s' in log's fragment: %s" % (rule, record))
    if (route == 'Sosearch' or route == 'Socheckform') and re.search(r',\s*BLOCKRULE,\s*', rules) or route == 'Sopassport' and re.search(r',\s*MALICIOUS,\s*', rules):
        for rule in re.split(r',\s*', rules):
            m = re.match(r'^(\w+)', rule)
            if m:
                data[t0][m.group(1)]['R256'] += 1
    if time() - lastSendTime > sendTimePeriod and procCnt <= maxProcesses:
        data2 = data.copy()
        initData()
        if maxProcesses <= 1:
            send_rules2db(data2)
        else:
            pid = None
            while True:
                try:
                    pid = os.fork()
                    if not pid:
                        send_rules2db(data2)
                        os.kill(os.getppid(), SIGUSR1)
                        os._exit(0)
                    procCnt += 1
                    break
                except OSError, e:
                    if e.errno == errno.EAGAIN:
                        sleep(3)
                    else:
                        LOG_TAIL.error("Unable to spawn child process for sending rules statistics: %s" % str(e), True)
                        break
        lastSendTime = time()

send_rules2db(data)

