#!/usr/bin/env python2.7
# encoding: utf-8
# kate: space-indent on; indent-width 4; replace-tabs on;
#
import os, os.path, sys, json, re, ConfigParser
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))

from time import time, localtime, strptime, strftime, mktime
from subprocess import Popen, PIPE
from collections import defaultdict
from socket import getfqdn
from common import getRoute, getGroup4Host
from so_log_tail import LogTail

__author__ = "Yaroslav Klimik <klimiky@yandex-team.ru>"
__version__ = "1.0"

global cfg, q, check_types, route, svc, stats, cluster, LOG_TAIL

check_types = {
    'YES':  'spam',
    'NO':   'ham',
    'DLVR': 'dlvr',
    'SKIP': 'skip'
}
cfg = ConfigParser.SafeConfigParser()
cfg.optionxform = str
route = getRoute()
if not route:
    route = sys.argv[2] if len(sys.argv) > 2 else 'in'
prefx = route.upper() if route != 'in' and route != 'out' else ''
group = re.sub(r'^mail_?so_?(.*?)(?:daemon)?$', r'\1', getGroup4Host(getfqdn()))
svc = "so_%s" % route
if group:
    svc = "so_%s" % group
else:
    group = route
route = group.upper()
cfg_file = sys.argv[1] if len(sys.argv) > 1 else 'WORKING_DIR/so-maillog-parser.ini'
cfg.read(cfg_file)
q = dict(cfg.items('general'))
q['dataResult'] = {}
q['log'] = 'maillog'
q['logData'] = {}
q['machine'] = getfqdn()

def initData():
    global stats, cluster
    stats = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
    cluster = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))

def send_monstats(stats):
    global LOG_TAIL
    for s in stats.keys():
        if s not in q['dataResult']:
            q['dataResult'][s] = []
        for (t, tmp) in stats[s].iteritems():
            if 'AvgTime' in tmp:
                tmp['_n_AvgTime'] = tmp['OK']
            if 'AvgCheckTime' in tmp:
                tmp['_n_AvgCheckTime'] = tmp['disp']
            tmp['time'] = t
            q['dataResult'][s].append(dict(tmp))
    try:
        (output, err) = Popen(['WORKING_DIR/so_logdata_sender.py'], stdin=PIPE, stdout=PIPE, stderr=PIPE, shell=True).communicate(input=json.dumps(q))
        if err:
            LOG_TAIL.error("Error while sending parsed data: %s. Output: %s" % (err, output))
    except Exception, e:
        LOG_TAIL.error("Exception while sending parsed data: %s" % str(e), True)

def send_plotnik(stats):
    for s in stats.keys():
        if not s:
            continue
        for (t, tmp) in stats[s].iteritems():
            for k in tmp.keys():
                print("{0}.{1}.{2} {3} {4}\n{0}.{2} {3} {4}\n".format(q['graphitePrefix'], s, k, tmp[k], t))

initData()
tail, lastSendTime = '', time()
LOG_TAIL = LogTail(cfg_file)
for record in LOG_TAIL():
    # string example
    # Mar  8 00:00:03 so1h spamstop[2605]: <NvT6fKkpVy-03vKBSDI> FRM: suid appended 'pashkova-mascha2015@yandex.ru id=852776206 country=ru karma=0 karma_status=0 uid=310395404 borndate=1427022912' vs 'pashkova-mascha2015@yandex.ru '
    valid, fields = False, []
    try:
        if re.match(r'^\W', record) and not re.search(r'\s+', record):
            LOG_TAIL.error('Unknown row format for row: %s' % record)
            continue
        fields = record.split()
        if len(fields) > 5:
            try:
                fields[5:] = [' '.join(fields[5:])]
                pp = fields[2].find('.')
                if pp > -1:
                    fields[2] = fields[2][:pp]
                if re.match(r'^[A-Za-z]+$', fields[0]) and fields[1].isdigit() and re.match(r'\d\d:\d\d:\d\d', fields[2]):
                    t = int(mktime(strptime("%s-%s-%s %s" % (localtime().tm_year, fields[0], fields[1], fields[2]), "%Y-%b-%d %H:%M:%S")))
                    if tail:
                        fields[-1] += tail
                        tail = ''
                    fields[0:4] = [t]
                    valid = True
            except Exception, e:
                LOG_TAIL.error("Exception: %s" % str(e), True)
        else:
            LOG_TAIL.error("Strange line: %s" % record)
        if not valid:
            tail += ' ' + record.strip()
            continue
    except Exception, e:
        LOG_TAIL.error("Exception '%s' while checking log record: %s" % (str(e), record), True)
    if len(fields) < 3:
        continue
    t0 = int(time() / 60) * 60
    t = t0 if not fields[0] or fields[0] > t0 else fields[0]
    r = 'BB_%s' % route.lower()
    m = re.search(r'BB took ([\d\.]+)', fields[2])
    if m:
        stats[r][t]['total'] += 1
        stats[r][t]['OK'] += 1
        f = float(m.group(1)) * 1000.0
        stats[r][t]['MinTime'] = min(stats[r][t]['MinTime'], f)
        stats[r][t]['MaxTime'] = max(stats[r][t]['MaxTime'], f)
        stats[r][t]['AvgTime'] += f
        if f < 10:
            stats[r][t]['0Time'] += 1
        elif f < 20:
            stats[r][t]['10Time'] += 1
        elif f < 50:
            stats[r][t]['20Time'] += 1
        elif f < 100:
            stats[r][t]['50Time'] += 1
        elif f < 200:
            stats[r][t]['100Time'] += 1
        else:
            stats[r][t]['200Time'] += 1
    elif fields[2].find('Blackbox httpget error') > -1:
        stats[r][t]['total'] += 1
        stats[r][t]['errorHttpGet'] += 1
    elif fields[2].find('Cannot connect blackbox') > -1:
        stats[r][t]['total'] += 1
        stats[r][t]['errorConnect'] += 1
    else:
        m = re.match(r'^<[\w+\-]+>\s+launch took [\d\.]+ sec, thread took\s+([\d\.]+)', fields[2])
        if m:
            stats[svc][t]['disp'] += 1
            f = float(m.group(1)) * 1000.0
            stats[svc][t]['MinCheckTime'] = min(stats[svc][t]['MinCheckTime'], f)
            stats[svc][t]['MaxCheckTime'] = max(stats[svc][t]['MaxCheckTime'], f)
            stats[svc][t]['AvgCheckTime'] += f
            if f < 100:
                stats[svc][t]['0CheckTime'] += 1
            elif f < 500:
                stats[svc][t]['100CheckTime'] += 1
            elif f < 1000:
                stats[svc][t]['500CheckTime'] += 1
            elif f < 2000:
                stats[svc][t]['1000CheckTime'] += 1
            else:
                stats[svc][t]['2000CheckTime'] += 1
        else:
            m = re.search(r'\budns_([a-z]+) error', fields[2])
            if m:
                stats[svc][t][m.group(1).upper()] += 1
            elif re.search(r'\budns\s+\w+\s+error', fields[2]) and fields[2].find('georbl') > -1:
                stats[svc][t]['GEORBL'] += 1
            elif fields[2].find('organizations') > -1:
                stats[svc][t]['ORGS'] += 1
            elif fields[2].find('All children busy') > -1 or fields[2].find('All threads busy') > -1:
                stats[svc][t]['errorMaxed'] += 1
            elif re.search(r'\bpopper\b', fields[2]):
                stats[svc][t]['popper'] += 1
            elif re.search(r'\bFast accept\b', fields[2]):
                stats[svc][t]['fast_accept'] += 1
                cluster[route][t]['fast_accept'] += 1
            elif re.search(r'\bFast spam\b', fields[2]):
                stats[svc][t]['fast_spam'] += 1
                cluster[route][t]['fast_spam'] += 1
            elif re.search(r'\bFast reject\b', fields[2]) or re.search(r'\bFast malic\b', fields[2]):
                stats[svc][t]['fast_reject'] += 1
                cluster[route][t]['fast_reject'] += 1
            else:
                m = re.match(r'^SOCACHE\s+([^\n\r]+)$', fields[2])
                if m and m.group(1):
                    s = m.group(1)
                    p = 1 if s.startswith('put ') else 0
                    if s.find(' error ') > -1:
                        if p:
                            stats[svc][t]['errorCachePut'] += 1
                        elif s.startswith('get '):
                            stats[svc][t]['errorCacheGet'] += 1
                    else:
                        if p:
                            stats[svc][t]['CACHEPUT'] += 1
                        elif s.startswith('hit '):
                            stats[svc][t]['CACHEHIT'] += 1
                        elif s.startswith('collision '):
                            stats[svc][t]['errorCacheCollision'] += 1
                if re.match(r'^Socket\s+.*?\s*error', fields[2]):
                    stats[svc][t]['errorSocket'] += 1
                elif fields[2].startswith('GetPreffered ip error'):
                    stats[svc][t]['errorResolveHost'] += 1
                elif fields[2].find('Parent process died') > -1:
                    stats[svc][t]['errorParentDied'] += 1
                elif fields[2].find('Restarting solver') > -1:
                    stats[svc][t]['errorSolverRestart'] += 1
                else:
                    m = re.search(r'X-Spam-Flag:\s*([A-Z]+)', fields[2])
                    if m:
                        if m.group(1) and m.group(1).upper() in check_types:
                            tp = check_types[m.group(1).upper()]
                            m = re.search(r'\bhits:\s*([-\.0-9]+)\b', fields[2])
                            if m and float(m.group(1)) == 0.0:
                                tp += '0'
                            stats[svc][t][tp] += 1
                        else:
                            LOG_TAIL.error('Unknown X-Spam-Flag value for row with data: %s %s' % (strftime("%b %d %H:%M:%S", fields[0]), ' '.join(fields[1:])))
                    else:
                        m = re.match(r'^(?:<[\w\-]+>\s)?(FBL|CMPL|ABOOK|KOOBA|YADISK)(?:\s|\:)', fields[2])
                        if m:
                            stats[svc][t][m.group(1)] += 1
    if time() - lastSendTime > int(q.get('sendTimePeriod', 60)):
        stats2 = stats.copy()
        cluster2 = cluster.copy()
        initData()
        #send_monstats(stats2)
        send_plotnik(cluster2)
        lastSendTime = time()
#send_monstats(stats)
send_plotnik(cluster)
