#!/usr/bin/env python
# encoding: utf-8
# kate: space-indent on; indent-width 4; replace-tabs on;
#
import os, os.path, sys, time, fcntl
from collections import defaultdict
from datetime import datetime
from socket import gethostname
from so_log_tail import LogTail
from mongodb import loadMongoDbCredentials, getMongoDB

PIDFILE = "/var/run/parse_journal_shortlog_out.pid"
TYPE_SO_OUT_SPAM = 1
LOGFILE = "/u0/%s/logs/shortdlv_out.log" % gethostname()
MONGO = {
    'db':      'journal',
    'hosts':   'sas-fxovfnlkhpwojehr.db.yandex.net,vla-f8vat6i2dl9fz4vc.db.yandex.net,man-28m87k85xm5ejgqn.db.yandex.net',
    'port':    27018,
    'user':    'journal',
    'timeout': 60000
}

def check_pid(filename):
    check_pid.file = open(filename, "w")
    try:
        fcntl.lockf(check_pid.file, fcntl.LOCK_EX | fcntl.LOCK_NB)
    except IOError:
        return False
    print >>check_pid.file, os.getpid()
    return True

def parse_date(date_str):
    if not hasattr(parse_date, "cache"):
        parse_date.cache = {}
    if date_str not in parse_date.cache:
        parse_date.cache[date_str] = int(time.mktime(time.strptime(date_str, "%d.%m.%Y")))
    return parse_date.cache[date_str]

if not check_pid(PIDFILE):
    print datetime.today().strftime("[%Y-%m-%d %H:%M:%S]"), "parse_journal_shortlog_out already running"
    exit()

print datetime.today().strftime("[%Y-%m-%d %H:%M:%S]"), "parse_journal_shortlog_out starts..."

LOG_TAIL = LogTail(None, log=LOGFILE, id='journal')
stat, uids, total, errors = defaultdict(int), set(), 0, 0
for record in LOG_TAIL():
    sf = record.split()
    if len(sf) < 17:
        print >>sys.stderr, "Wrong row: %s" % record
        errors += 1
        continue
    if sf[3] != 'M' and sf[3] != 'S':
        continue
    try:
        total += 1
        uid = int(sf[16]) if sf[16] and sf[16] != '-' else 0
        date = sf[6]
        uids.add(uid)
        stat[(uid, date)] += 1
    except Exception, e:
        errors += 1
try:
    loadMongoDbCredentials(MONGO)
    db = getMongoDB(MONGO)
    for uid in uids:
        try:
            db["users"].update_one({"uid": uid}, {"$set": {"uid": uid}}, upsert = True)
        except:
            errors += 1
    for (uid, date), count in sorted(stat.iteritems()):
        try:
            date = parse_date(date)
            db["journal"].update_one({"uid": uid, "date": date, "type": TYPE_SO_OUT_SPAM}, {"$inc": {"count": count}}, upsert = True)
        except:
            errors += 1
except Exception, e:
    print >>sys.stderr, "Exception while DB operations: %s" % str(e)

print datetime.today().strftime("[%Y-%m-%d %H:%M:%S]"), "Parsing done. Total lines: %d. Errors: %d" % (total, errors)
