#!/usr/bin/env python2
#
import sys, re, time
sys.path.insert(0, '/opt/precast-sosearchlog-parser')
from collections import defaultdict
from subprocess import Popen, PIPE
from socket import gethostname
from common import get_traceback
from so_log_tail import LogTail, LOGS_FOLDER
from mongodb import loadMongoDbCredentials, getMongoDB

KEY_PREFIX = "so.search"
PARSE_PERIOD = 60
SEND_DATA_PERIOD = 60
TYPE_MSEARCH_SPAM = 0
LOGFILE = "/u0/%s/usr/local/www/logs/SO_MSEARCHPROXY/sendanswerfull.log" % gethostname()
ERRORLOG = "%s/parse_search_trace.log" % LOGS_FOLDER
MONGO = {
    'db':      'journal',
    'hosts':   'sas-fxovfnlkhpwojehr.db.yandex.net,vla-f8vat6i2dl9fz4vc.db.yandex.net',
    'port':    27018,
    'user':    'journal',
    'timeout': 60000
}

def parse_timestamp(line):
    if not hasattr(parse_timestamp, "cache"):
        parse_timestamp.cache = defaultdict(int)
    cache = parse_timestamp.cache
    if len(cache) > 1000:
        cache.clear()
    pos = line.find("]")
    if pos < 0:
        return None
    timestr = line[1:pos]
    try:
        if timestr not in cache:
            cache[timestr] = int(time.mktime(time.strptime(timestr, "%Y.%m.%d %H:%M:%S")))
        return cache[timestr]
    except ValueError, e:
        print >>sys.stderr, "Exception: %s.%s" % (str(e), get_traceback()); sys.stderr.flush()
        return None

def parse_date(date_str):
    if not hasattr(parse_date, "cache"):
        parse_date.cache = {}
    if date_str not in parse_date.cache:
        parse_date.cache[date_str] = int(time.mktime(time.strptime(date_str, "%Y.%m.%d")))
    return parse_date.cache[date_str]

def send2db(stats, uids):
    errors = 0
    try:
        db = getMongoDB(MONGO)
        for uid in uids:
            try:
                db["users"].update_one({"uid": uid}, {"$set": {"uid": uid}}, upsert=True)
            except:
                errors += 1
        for (date, uid), count in sorted(stats.iteritems()):
            try:
                date = parse_date(date)
                db["journal"].update_one({"uid": uid, "date": date, "type": TYPE_MSEARCH_SPAM}, {"$inc": {"count": count}}, upsert=True)
            except:
                errors += 1
    except Exception, e:
        print >>sys.stderr, "Exception while DB operations: %s.%s" % (str(e), get_traceback()); sys.stderr.flush()

def send2plotnik(stats):
    buf = ""
    for timestamp, info in sorted(stats.iteritems()):
        for name, count in sorted(info.iteritems()):
            buf += "%s.%s %d %d\n" % (KEY_PREFIX, name, count, timestamp)
    print buf
    sys.stdout.flush()

if __name__ == "__main__":
    stat1, stat2, uids, errors, lastSendTime = defaultdict(lambda: defaultdict(int)), defaultdict(int), set(), 0, time.time()
    loadMongoDbCredentials(MONGO)
    LOG_TAIL = LogTail(None, log=LOGFILE, id='sosearch_sendanswerfull', errorlog=ERRORLOG)
    for line in LOG_TAIL():
        try:
            if len(line) < 3:
                continue
            timestamp = parse_timestamp(line)
            if not timestamp:
                continue
            timestamp_round = int(timestamp / PARSE_PERIOD) * PARSE_PERIOD
            if line.find("step=1") >= 0:
                stat1[timestamp_round]["total"] += 1
            if re.search("found=[^0]+", line) is not None:
                stat1[timestamp_round]["found"] += 1
            if line.find("\"spam\":true") > -1 and line.find("&so_offset=0") > -1:
                pos = line.find("&uid=")
                if pos > -1:
                    date = line[1:11]
                    uid = int(line[pos + 5: line.find("&", pos + 1)])
                    uids.add(uid)
                    stat2[(date, uid)] += 1
        except Exception, e:
            print >>sys.stderr, "Exception: %s.%s" % (str(e), get_traceback()); sys.stderr.flush()
        if time.time() - lastSendTime > SEND_DATA_PERIOD:
            data1 = stat1.copy()
            data2 = stat2.copy()
            send2db(data2, uids)
            send2plotnik(data1)
            stat1, stat2, uids = defaultdict(lambda: defaultdict(int)), defaultdict(int), set()
            lastSendTime = time.time()
    send2db(stat2, uids)
    send2plotnik(stat1)
