#!/usr/bin/env python
# encoding: utf-8
# kate: space-indent on; indent-width 4; replace-tabs on;
#
import sys, time
from collections import defaultdict
from socket import gethostname
from so_log_tail import LogTail

KEY_PREFIX = "so.cluster"
PARSE_PERIOD = 60
SPAMTYPES = { "s": "spam", "h": "ham", "m": "malic", "d": "dlv" }
ROUTE = sys.argv[1] if len(sys.argv) > 1 else 'in'
LOGFILE = sys.argv[2] if len(sys.argv) > 2 else '/u0/{0}/logs/shortdlv_{1}.log'.format(gethostname(), ROUTE)

def parse_timestamp(s):
    if not hasattr(parse_timestamp, "cache"):
        parse_timestamp.cache = {}
    cache = parse_timestamp.cache
    if s not in cache:
        try:
            cache[s] = int(time.mktime(time.strptime(s, "%d.%m.%Y %H:%M:%S")))
        except Exception, e:
            print >>sys.stderr, "Exception: %s" % str(e)
            cache[s] = time.time()
    return cache[s]

LOG_TAIL = LogTail(None, log=LOGFILE, id='soplotnik_shortdlv%s' % ROUTE)
stat = defaultdict(lambda: defaultdict(float))
for line in LOG_TAIL():
    sf = line.split("\t")
    try:
        parts = ([sf[2], sf[5]] if sf[0].find('@') > -1 else [sf[3], sf[6], sf[8]]) if ROUTE == 'out' else [sf[2], sf[3]]
        if len(parts) < 2:
            continue
        spamtype = parts[0].lower()
        if spamtype not in SPAMTYPES:
            continue
        spamname = SPAMTYPES[spamtype]
        timestamp = int(parse_timestamp(parts[1].strip()) / PARSE_PERIOD) * PARSE_PERIOD
        stat[timestamp]["%s.%s" % (ROUTE, spamname)] += 1
        if ROUTE == 'out' and spamtype == "s" and len(parts) > 2 and parts[2].find("_YW") >= 0:
            stat[timestamp]["out.captcha"] += 1
    except Exception, e:
        print >>sys.stderr, "Exception while paring row '%s': %s" % (line, str(e))

buf = ""
for timestamp, info in sorted(stat.iteritems()):
    for name, count in sorted(info.iteritems()):
        buf += "%s.%s %f %d\n" % (KEY_PREFIX, name, count, timestamp)
if len(buf) > 0:
    print buf
    sys.stdout.flush()
