#!/usr/bin/env python2.7
# encoding: utf-8
# kate: space-indent on; indent-width 4; replace-tabs on;
#
from __future__ import print_function
import os, os.path, sys, pymongo, bson
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
from traceback import format_exception
from collections import defaultdict
from urllib import urlopen
from datetime import datetime, timedelta
from time import time, strptime, mktime, strftime, localtime

__author__ = "Yaroslav Klimik <klimiky@yandex-team.ru>"
__version__ = "1.0"

RETRY_COUNT = 3
WORKING_DIR = '/opt/complaints'
MONGO_1 = {
    'cluster': 'so_db',
    'db':      'solog',
    'hosts':   'db1j.so.yandex.net,db1m.so.yandex.net,db1h.so.yandex.net',
    'port':    27017,
    'user':    'solog',
    'timeout': 60000
}
MONGO_2 = {
    'db':      'rules',
    'hosts':   'sas-8bydc7gbz3h070ke.db.yandex.net,vla-hwmeehtmq450wvke.db.yandex.net,man-t5y0rvgx2wm1mer4.db.yandex.net',
    'port':    27018,
    'user':    'solog',
    'timeout': 70000
}
def get_traceback():
    exc_type, exc_value, exc_traceback = sys.exc_info()
    tb = ''
    for step in format_exception(exc_type, exc_value, exc_traceback):
        try:
            tb += "\t" + step.strip() + "\n"
        except:
            pass
    return tb

def doRequest(url, prompt = "doRequest"):
    try:
        f = urlopen(url)
        if f.getcode() == 200:
            return f.read()
        else:
            print('{0} response HTTP code: {1}, body: {2}'.format(prompt, f.getcode(), f.info()))
    except Exception, e:
        print('%s HTTP request failed: %s.\n%s' % (prompt, str(e), get_traceback()))
    return ""

def getHosts4Group(conductor_group, default_hosts = []):
    if conductor_group:
        for i in range(RETRY_COUNT):
            r = doRequest("https://c.yandex-team.ru/api-cached/groups2hosts/%s" % conductor_group, "Get DB cluster hosts for group %s" % conductor_group)
            if r:
                hosts = map(str.strip, r.splitlines())
                return hosts if len(hosts) > 0 else default_hosts
            else: continue
    return default_hosts

def mongo_conn_str(cfg):
    s = ''
    if 'user' in cfg and cfg['user']:
        s = "%s:%s@" % (cfg['user'], cfg['passwd'])
    return "mongodb://%s%s/%s" % (s, ','.join(getHosts4Group(cfg.get('cluster', ''), cfg['hosts'].split(','))), cfg['db'])

def loadMongoDbCredentials(cfg):
    f, CURDIR = None, WORKING_DIR
    try:
        if not os.path.exists('%s/.mongodb.%s' % (CURDIR, cfg['db'])):
            CURDIR = os.path.dirname(os.path.abspath(__file__))
            if not os.path.exists('%s/.mongodb.%s' % (CURDIR, cfg['db'])) and 'HOME' in os.environ:
                CURDIR = os.environ['HOME']
        if os.path.exists('%s/.mongodb.%s' % (CURDIR, cfg['db'])):
            f = open('%s/.mongodb.%s' % (CURDIR, cfg['db']))
            for line in f:
                sf = line.split(':')
                if len(sf) == 2:
                    cfg['user'], cfg['passwd'] = sf[0], sf[1].strip()
                    break
            f.close()
        elif 'user' in cfg:
            print("ERROR: Unable to locate file with DB credentials in dir '%s'!" % CURDIR)
    except Exception, e:
        print("loadMongoDbCredentials exception: %s.%s" % (str(e), get_traceback()))

def getMongoDB(cfg):
    if not hasattr(getMongoDB, "%s_connection" % cfg['db']):
        timeout = cfg['timeout'] if 'timeout' in cfg and cfg['timeout'] else 10000
        setattr(getMongoDB, "%s_connection" % cfg['db'], pymongo.MongoClient(
            host=mongo_conn_str(cfg),
            port=cfg['port'],
            connectTimeoutMS=timeout,
            socketTimeoutMS=timeout #, unicode_decode_error_handler='ignore'
        )[cfg['db']])
    return getattr(getMongoDB, "%s_connection" % cfg['db'])

loadMongoDbCredentials(MONGO_1)
loadMongoDbCredentials(MONGO_2)
route, match, match_detailed = (sys.argv[1] if len(sys.argv) > 1 else 'in').capitalize(), None, None
skip_first_daily = 1 if len(sys.argv) > 4 and sys.argv[4] and sys.argv[4] != 'false' and sys.argv[4] != '0' else 0
if len(sys.argv) > 2:
    mindate = sys.argv[2].split()
    match = {'$gte': mindate[0]}
    t0 = int(mktime(strptime(mindate[0], "%Y-%m-%d")))
    if len(mindate) > 1:
        skip_first_daily = 1
        match_detailed = {'$gte': int(mktime(strptime(sys.argv[2], "%Y-%m-%d %H:%M"))), '$lt': t0 + 86400}
    else:
        match_detailed = {'$gte': t0, '$lt': t0 + 86400}
else:
    mindate = ''
if len(sys.argv) > 3:
    maxdate = sys.argv[3].split()
    if mindate:
        match['$lt'] = maxdate[0]
    else:
        match = {'$lt': maxdate[0]}
else:
    maxdate = ''
if route.startswith('So'):
    PARAMS = ('R1', 'R4', 'R256')
else:
    PARAMS = ('R1', 'R2', 'R4', 'R8', 'R127', 'R256', 'cmpl_spam', 'cmpl_spam_nopf', 'cmpl_ham', 'cmpl_ham_nopf')
detailed = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
daily = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
try:
    db1 = getMongoDB(MONGO_1)
    db2 = getMongoDB(MONGO_2)
    match = {'date': match} if match else {}
    daily_collection = db1['Rules_%s' % route].with_options(codec_options=bson.CodecOptions(unicode_decode_error_handler="ignore"))
    detailed_collection = db1['detailed_Rules_%s' % route].with_options(codec_options=bson.CodecOptions(unicode_decode_error_handler="ignore"))
    for d in sorted(daily_collection.distinct('date', match)):
        print("Date '%s': " % d, end=''); sys.stdout.flush()
        if not skip_first_daily:
            for record in daily_collection.find({'date': d}):
                try:
                    data_row = {}
                    for p in PARAMS:
                        if p in record and record[p]:
                            data_row[p] = int(record[p])
                    db2["Rules_%s" % route].update_one({'date': d, 'rule': record['rule']}, {'$inc': data_row}, upsert=True)
                except Exception, e:
                    print("Retrieving of record failed: %s.%s" % (str(e), get_traceback()))
            print("daily statistics done"); sys.stdout.flush()
            match_detailed = {
                '$gte': int(mktime(strptime('{0} 00:00:00'.format(d), "%Y-%m-%d %H:%M:%S"))),
                '$lt':  int(mktime((datetime.strptime('{0} 00:00:00'.format(d), "%Y-%m-%d %H:%M:%S") + timedelta(days=1)).timetuple()))
            }
        else:
            skip_first_daily = 0
            print("daily statistics skipped"); sys.stdout.flush()
        for ts in sorted(detailed_collection.distinct('time', {'time': match_detailed})):
            for record in detailed_collection.find({'time': ts}):
                try:
                    data_row = {}
                    for p in PARAMS:
                        if p in record and record[p]:
                            data_row[p] = int(record[p])
                    db2["detailed_Rules_%s" % route].update_one({'time': ts, 'rule': record['rule']}, {'$inc': data_row}, upsert=True)
                except Exception, e:
                    print("Saving of record failed: %s.%s" % (str(e), get_traceback()))
            print("Datetime '%s': done" % strftime("%Y-%m-%d %H:%M", localtime(ts))); sys.stdout.flush()
        print("Date '%s': detailed statistics done" % d); sys.stdout.flush()
except Exception, e:
    print("Exception '%s' while retrieving record.%s" % (str(e), get_traceback()), file=sys.stderr); sys.stderr.flush()
