#!/usr/bin/env python2.7
# encoding: utf-8
# kate: space-indent on; indent-width 4; replace-tabs on;
#
import os, os.path, sys, json, re, ConfigParser, subprocess
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))

from time import time, localtime, strptime, mktime
from collections import defaultdict
from socket import getfqdn
from signal import signal, SIG_IGN, SIGUSR1, SIGINT, SIGHUP, SIGCHLD, SIGPIPE, SIGQUIT, SIGABRT, SIGTERM
from subprocess import Popen, PIPE
from common import getRoute, getGroup4Host
from so_log_tail import LogTail

__author__ = "Yaroslav Klimik <klimiky@yandex-team.ru>"
__version__ = "1.0"

global cfg, q, LOG_TAIL, route, svc, prefx, data, data_so

LOG_ROW_RE = re.compile(r"^(\w+\s+\d+\s+\d+:\d+:\d+)[^']*'([^'\.]+\w\.[^']+)' ([A-Z]+)?.*\s(<.*)$", re.I)
FUNCS_RE = re.compile(r"<([A-Z0-9]+)[:,]([^\<\>]+)>", re.I)
PARAM_INFO_RE = re.compile(r"^([A-Z]+),([0-9\.]+),([^,<>]+),([A-Z]),[\w\-\.\:]+(?:,[\w\-\.\:]+)?(?:;([^\s<>]+))?$")
LOG_TAIL = None
cfg = ConfigParser.SafeConfigParser()
cfg.optionxform = str
route = getRoute()
if not route:
    route = sys.argv[2] if len(sys.argv) > 2 else 'in'
prefx = route.upper() if route != 'in' and route != 'out' else ''
group = re.sub(r'^mail_?so_?(.*?)(?:daemon)?$', r'\1', getGroup4Host(getfqdn()))
svc = "so_%s" % route
if group:
    svc = "so_%s" % group
else:
    group = route
route = group.upper()
cfg_file = sys.argv[1] if len(sys.argv) > 1 else 'WORKING_DIR/so-monlog-parser.ini'
cfg.read(cfg_file)
q = dict(cfg.items('general'))
q['dataResult'] = {}
q['log'] = 'spamonlog'
q['logData'] = {}
q['machine'] = getfqdn()
q['prefix'] = route.upper() if route != 'in' and route != 'out' else ''

def initData():
    global data, data_so
    data = defaultdict(lambda: defaultdict(lambda: defaultdict(lambda: defaultdict(lambda: defaultdict(float)))))
    data_so = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))

def send_monstats(data, data_so):
    global LOG_TAIL
    for (r, tmp) in data_so.iteritems():
        s2 = "%sSO_%s" % (q['prefix'], r)
        if s2 not in q['dataResult']:
            q['dataResult'][s2] = []
        for t in tmp.keys():
            tmp[t]['time'] = t
            q['dataResult'][s2].append(dict(tmp[t]))
    for s in data.keys():
        for (r, tmp) in data[s].iteritems():
            for par in tmp.keys():
                s2 = "%s%s_%s_%s" % (q['prefix'], s, par, r)
                if s2 not in q['dataResult']:
                    q['dataResult'][s2] = []
                for t in tmp[par].keys():
                    if 'AvgAnswerTime' in tmp[par][t]:
                        tmp[par][t]['_n_AvgAnswerTime'] = tmp[par][t]['total']
                    tmp[par][t]['Percent'] = tmp[par][t]['OK'] * 100.0 / max(tmp[par][t]['total'], 1) if 'OK' in tmp[par][t] and tmp[par][t]['OK'] else 0
                    tmp[par][t]['time'] = t
                    q['dataResult'][s2].append(dict(tmp[par][t]))
    try:
        (output, err) = Popen(['WORKING_DIR/so_logdata_sender.py'], stdin=PIPE, stdout=PIPE, stderr=PIPE, shell=True).communicate(input=json.dumps(q))
        if err:
            LOG_TAIL.error("Error while sending parsed data: %s. Output: %s" % (err, output))
    except Exception, e:
        LOG_TAIL.error("Exception while sending parsed data: %s" % str(e), True)

def gotSignal(signum, frame):
    global LOG_TAIL, data, data_so
    if data and data_so and (len(data.keys()) > 0 or len(data_so.keys()) > 0):
        send_monstats(data, data_so)
    sys.exit(1)

signal(SIGCHLD, SIG_IGN)
signal(SIGPIPE, gotSignal)
signal(SIGQUIT, gotSignal)
signal(SIGABRT, gotSignal)
signal(SIGTERM, gotSignal)
signal(SIGINT,  gotSignal)
signal(SIGHUP,  gotSignal)
initData()
LOG_TAIL = LogTail(cfg_file)
lastSendTime = time()
for record in LOG_TAIL():
    # string example
    #Mar  5 00:01:02.381 -  _2557_ 'so1h.mail.yandex.net' IN '366872778.201803050200@urbis.es'  <VERS,4.100,92067304><NSHN:NG,5,OK,F,2a02:6b8:0:3400::83f><NSHN:NP,2,OK,F,2a02:6b8:0:3400::83f><STV6:IS,5,OK,F,93.158.157.61><STATL:DL,9,OK,F,127.0.0.1><NPRSF:LS,16,OK,F,2a02:6b8:0:3400::3:63><FREEM: -><SENDR:TG,68,OK,F,2a02:6b8:0:3400::1:50><SENDR:TP,15,OK,F,2a02:6b8:0:3400::1:50><URLRP:UR,18,OK,F,2a02:6b8:0:3400::1:111><ABOOK:AB,21,OK,F,2a02:6b8:0:3400::1058><DISK: -><ABUSE:CG,4,OK,F,2a02:6b8:0:3400::82b><PERSB: -><DICT:DG,5,OK,F,2a02:6b8:0:3400::38b><DICT:DP,2,OK,F,2a02:6b8:0:3400::38b><ACTSH:GGC,34,OK,F,so-user-activity.n.yandex-team.ru><UAAS:US,15,OK,F,2a02:6b8:0:3400::2:48><LSA:GTT,15,OK,F,2a02:6b8:0:3400::1:50><SO,MSHIN,FNRLY,MALIC>
    valid, fields = False, []
    try:
        if re.match(r'^\W', record) and not re.search(r'\s+', record):
            LOG_TAIL.error('Unknown row format for row: %s' % record)
            continue
        fields = record.split()
        if len(fields) > 8:
            try:
                fields[8:] = [' '.join(fields[8:])]
                pp = fields[2].find('.')
                if pp > -1:
                    fields[2] = fields[2][:pp]
                if re.match(r'^[A-Za-z]+$', fields[0]) and fields[1].isdigit() and re.match(r'\d\d:\d\d:\d\d', fields[2]):
                    t = int(mktime(strptime("%s-%s-%s %s" % (localtime().tm_year, fields[0], fields[1], fields[2]), "%Y-%b-%d %H:%M:%S")))
                    fields[0:5] = [t if t > 0 else time()]
                    valid = True
            except Exception, e:
                LOG_TAIL.error("Exception: %s" % str(e), True)
        else:
            LOG_TAIL.error("Strange line: %s" % record)
        if not valid:
            continue
    except Exception, e:
        LOG_TAIL.error("Exception '%s' while checking log record: %s" % (str(e), record), True)
    if len(fields) < 3:
        continue
    #if q['machine'] != fields[2]:
    #    LOG_TAIL.error("Hostname mismatch between the record in log (%s) and the current machine (%s)" % (fields[4], q['machine']))
    t = fields[0]
    t0, route = int(t / 60) * 60, fields[2].upper() if fields[2] else route.upper()
    for (svc, par_info) in FUNCS_RE.findall(fields[4]):
        if not par_info or svc == 'VERS' or re.match(r"\s+\-", par_info):
            continue
        if svc == 'SO':
            for par in par_info.split(','):
                data_so[route][t0][par] += 1
        elif svc == 'VERS':
            par_data = par_info.split(',')
            if len(par_data) >= 2:
                data[svc][route]['DVER'][t0][par_data[0]] = 1
                data[svc][route]['RVER'][t0][par_data[1]] = 1
        else:
            m = PARAM_INFO_RE.match(par_info)
            if m:
                svc += 'S' if m.group(4) and m.group(4) == 'S' else ''
                fn, dt, par = m.group(1), float(m.group(2)), m.group(3)
                data[svc][route][fn][t0][re.sub(r'[^\w]+', '', par)] += 1
                if m.group(5):
                    for e in m.group(5).split(','):
                        data[svc][route][fn][t0]['Error%s' % re.sub(r'[^\w]+', '', e)] += 1
                data[svc][route][fn][t0]['MinAnswerTime'] = min(data[svc][route][fn][t0]['MinAnswerTime'], dt) if 'MinAnswerTime' in data[svc][route][fn][t0] else dt
                data[svc][route][fn][t0]['MaxAnswerTime'] = max(data[svc][route][fn][t0]['MaxAnswerTime'], dt) if 'MaxAnswerTime' in data[svc][route][fn][t0] else dt
                if 'AvgAnswerTime' in data[svc][route][fn][t0]:
                    data[svc][route][fn][t0]['AvgAnswerTime'] += dt;
                else:
                    data[svc][route][fn][t0]['AvgAnswerTime'] = dt
                if dt < 10:
                    data[svc][route][fn][t0]['0AnswerTime'] += 1
                elif dt < 20:
                    data[svc][route][fn][t0]['10AnswerTime'] += 1
                elif dt < 50:
                    data[svc][route][fn][t0]['20AnswerTime'] += 1
                elif dt < 100:
                    data[svc][route][fn][t0]['50AnswerTime'] += 1
                elif dt < 200:
                    data[svc][route][fn][t0]['100AnswerTime'] += 1
                else:
                    data[svc][route][fn][t0]['200AnswerTime'] += 1
                data[svc][route][fn][t0]['total'] += 1
    if time() - lastSendTime > int(q.get('sendTimePeriod', 60)):
        data2 = data.copy()
        data_so2 = data_so.copy()
        initData()
        send_monstats(data2, data_so2)
        lastSendTime = time()

send_monstats(data, data_so)
