#!/usr/bin/env python2.7
# encoding: utf-8
# kate: space-indent on; indent-width 4; replace-tabs on;
#
from __future__ import print_function
import os, os.path, sys, re, pymongo, bson, psycopg2
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
from psycopg2.extras import execute_batch
from datetime import datetime, timedelta
from time import time, strptime, mktime
from traceback import format_exception
from collections import defaultdict
from urllib import urlopen

__author__ = "Yaroslav Klimik <klimiky@yandex-team.ru>"
__version__ = "1.0"

RETRY_COUNT = 3
WORKING_DIR = '/opt/complaints'
MONGO = {
    'cluster': 'so_db',
    'db':      'solog',
    'hosts':   'db1j.so.yandex.net,db1m.so.yandex.net,db1h.so.yandex.net',
    'port':    27017,
    'user':    'solog',
    'timeout': 60000
}
PG = {
    "host": "iva-6wt50iawqrjjh088.db.yandex.net,myt-javd626w1erx0nto.db.yandex.net,sas-l556v0oqlmdx9fyf.db.yandex.net",
    "port": 6432,
    "db":   "rules",
    "user": "solog"
}

def get_traceback():
    exc_type, exc_value, exc_traceback = sys.exc_info()
    tb = ''
    for step in format_exception(exc_type, exc_value, exc_traceback):
        try:
            tb += "\t" + step.strip() + "\n"
        except:
            pass
    return tb

def doRequest(url, prompt = "doRequest"):
    try:
        f = urlopen(url)
        if f.getcode() == 200:
            return f.read()
        else:
            print('{0} response HTTP code: {1}, body: {2}'.format(prompt, f.getcode(), f.info()))
    except Exception, e:
        print('%s HTTP request failed: %s.\n%s' % (prompt, str(e), get_traceback()))
    return ""

def getHosts4Group(conductor_group, default_hosts = []):
    if conductor_group:
        for i in range(RETRY_COUNT):
            r = doRequest("https://c.yandex-team.ru/api-cached/groups2hosts/%s" % conductor_group, "Get DB cluster hosts for group %s" % conductor_group)
            if r:
                hosts = map(str.strip, r.splitlines())
                return hosts if len(hosts) > 0 else default_hosts
            else: continue
    return default_hosts

def mongo_conn_str(cfg):
    s = ''
    if 'user' in cfg and cfg['user']:
        s = "%s:%s@" % (cfg['user'], cfg['passwd'])
    return "mongodb://%s%s/%s" % (s, ','.join(getHosts4Group(cfg['cluster'], cfg['hosts'].split(','))), cfg['db'])

def loadMongoDbCredentials(cfg):
    f, CURDIR = None, WORKING_DIR
    try:
        if not os.path.exists('%s/.mongodb.%s' % (CURDIR, cfg['db'])):
            CURDIR = os.path.dirname(os.path.abspath(__file__))
            if not os.path.exists('%s/.mongodb.%s' % (CURDIR, cfg['db'])) and 'HOME' in os.environ:
                CURDIR = os.environ['HOME']
        if os.path.exists('%s/.mongodb.%s' % (CURDIR, cfg['db'])):
            f = open('%s/.mongodb.%s' % (CURDIR, cfg['db']))
            for line in f:
                sf = line.split(':')
                if len(sf) == 2:
                    cfg['user'], cfg['passwd'] = sf[0], sf[1].strip()
                    break
            f.close()
        elif 'user' in cfg:
            print("ERROR: Unable to locate file with DB credentials in dir '%s'!" % CURDIR)
    except Exception, e:
        print("loadMongoDbCredentials exception: %s.%s" % (str(e), get_traceback()))

def getMongoDB(cfg):
    if not hasattr(getMongoDB, "%s_connection" % cfg['db']):
        timeout = cfg['timeout'] if 'timeout' in cfg and cfg['timeout'] else 10000
        setattr(getMongoDB, "%s_connection" % cfg['db'], pymongo.MongoClient(
            host=mongo_conn_str(cfg),
            port=cfg['port'],
            connectTimeoutMS=timeout,
            socketTimeoutMS=timeout #, unicode_decode_error_handler='ignore'
        )[cfg['db']])
    return getattr(getMongoDB, "%s_connection" % cfg['db'])

def getPGCredentials(cfg):
    f, CURDIR, dbname = None, WORKING_DIR, cfg['db'][:len(cfg['db']) - 2] if cfg['db'].endswith('db') else cfg['db']
    try:
        if not os.path.exists('{0}/.pgpass.{1}'.format(CURDIR, dbname)):
            CURDIR = os.path.dirname(os.path.abspath(__file__))
            if not os.path.exists('{0}/.pgpass.{1}'.format(CURDIR, dbname)) and 'HOME' in os.environ:
                CURDIR = os.environ['HOME']
        f = open('{0}/.pgpass.{1}'.format(CURDIR, dbname))
        for line in f:
            sf = line.split(':')
            if len(sf) == 5 and sf[2] == cfg['db']:
                cfg['host'], cfg['port'], cfg['user'], cfg['password'] = sf[0], int(sf[1]), sf[3], sf[4].strip()
                break
        f.close()
    except Exception, e:
        print("getPGCredentials exception: %s.%s" % (str(e), get_traceback()))

def getPGdb(cfg, mode = 'read-write'):
    if not hasattr(getPGdb, "%s_connection" % cfg['db']) or hasattr(getattr(getPGdb, "%s_connection" % cfg['db']), 'closed') and getattr(getattr(getPGdb, "%s_connection" % cfg['db']), 'closed'):
        CURDIR = WORKING_DIR
        if not os.path.exists('%s/.pgsql/root.crt' % CURDIR):
            CURDIR = os.path.dirname(os.path.abspath(__file__))
            if not os.path.exists('%s/.pgsql/root.crt' % CURDIR) and 'HOME' in os.environ:
                CURDIR = os.environ['HOME']
        if hasattr(psycopg2, '__libpq_version__'):
            setattr(getPGdb, "%s_connection" % cfg['db'], psycopg2.connect(dbname = cfg['db'], user = cfg['user'], password = cfg['password'], host = cfg['host'], port = cfg['port'], sslmode = 'verify-full', sslrootcert = '%s/.pgsql/root.crt' % CURDIR, target_session_attrs = ('read-write' if mode == 'read-write' else 'any')))
        else:
            if ',' in cfg['host']:
                for host in cfg['host'].split(','):
                    try:
                        pg = psycopg2.connect(database = cfg['db'], user = cfg['user'], password = cfg['password'], host = host, port = cfg['port'], sslmode = 'verify-full', sslrootcert = '%s/.pgsql/root.crt' % CURDIR)
                        pg_cursor = pg.cursor()
                        pg_cursor.execute("SELECT pg_is_in_recovery()")
                        res = pg_cursor.fetchone()
                        pg_cursor.close()
                        if res and (mode == 'read-write' and not res[0] or mode != 'read-write' and res[0]):
                            return pg
                    except Exception, e:
                        print("getPGdb exception: %s.%s" % (str(e), get_traceback()))
                        continue
            else:
                setattr(getPGdb, "%s_connection" % cfg['db'], psycopg2.connect(database = cfg['db'], user = cfg['user'], password = cfg['password'], host = cfg['host'], port = cfg['port'], sslmode = 'verify-full', sslrootcert = '%s/.pgsql/root.crt' % CURDIR))
    return getattr(getPGdb, "%s_connection" % cfg['db'])

loadMongoDbCredentials(MONGO)
getPGCredentials(PG)
route, match = (sys.argv[1] if len(sys.argv) > 1 else 'in').capitalize(), None
if len(sys.argv) > 2:
    mindate = sys.argv[2]
    match = {'$gte': mindate}
else:
    mindate = ''
if len(sys.argv) > 3:
    maxdate = sys.argv[3]
    if mindate:
        match['$lt'] = maxdate
    else:
        match = {'$lt': maxdate}
else:
    maxdate = ''
if route.startswith('So'):
    PARAMS = ('R1', 'R4', 'R256')
else:
    PARAMS = ('R1', 'R2', 'R4', 'R8', 'R127', 'R256', 'cmpl_spam', 'cmpl_spam_nopf', 'cmpl_ham', 'cmpl_ham_nopf')
pars = ', '.join([p.lower() for p in PARAMS])
placeholders = ', '.join(['%s' for p in PARAMS])
detailed_increment = ', '.join(map(lambda p: '{0} = detailed_rules_{1}.{0} + EXCLUDED.{0}'.format(p.lower(), route.lower()), PARAMS))
daily_increment = ', '.join(map(lambda p: '{0} = rules_{1}.{0} + EXCLUDED.{0}'.format(p.lower(), route.lower()), PARAMS))

try:
    mongo = getMongoDB(MONGO)
    pg = getPGdb(PG)
    pg_cursor = pg.cursor()
    match = {'date': match} if match else {}
    daily_collection = mongo['Rules_%s' % route].with_options(codec_options=bson.CodecOptions(unicode_decode_error_handler="ignore"))
    detailed_collection = mongo['detailed_Rules_%s' % route].with_options(codec_options=bson.CodecOptions(unicode_decode_error_handler="ignore"))
    for d in daily_collection.distinct('date', match):
        print("Date '%s': " % d, end=''); sys.stdout.flush()
        data = []
        for record in daily_collection.find({'date': d}):
            try:
                data_row = [d, record['rule']]
                data_row[2:] = [(int(record[p]) if p in record else 0) for p in PARAMS]
                data.append(data_row)
            except Exception, e:
                print("Retrieving of record failed: %s.%s" % (str(e), get_traceback()))
        #print("\nData to insert: %s" % str(data))
        execute_batch(pg_cursor, """INSERT INTO rules_{0} (date, rule, {1}) VALUES(%s, %s, {2})
                ON CONFLICT (rule, date) DO UPDATE SET {3}""".format(route.lower(), pars, placeholders, daily_increment), data)
        pg.commit()
        print("daily statistics done", end=''); sys.stdout.flush()
        match_detailed = {
            '$gte': int(mktime(strptime('{0} 00:00:00'.format(d), "%Y-%m-%d %H:%M:%S"))),
            '$lt':  int(mktime((datetime.strptime('{0} 00:00:00'.format(d), "%Y-%m-%d %H:%M:%S") + timedelta(days=1)).timetuple()))
        }
        for ts in detailed_collection.distinct('time', {'time': match_detailed}):
            data = []
            for record in detailed_collection.find({'time': ts}):
                if not re.match(r'\w+', record['rule']):
                    continue
                data_row = [ts, record['rule']]
                data_row[2:] = [(int(record[p]) if p in record else 0) for p in PARAMS]
                data.append(data_row)
            execute_batch(pg_cursor, """INSERT INTO detailed_rules_{0} (time, rule, {1}) VALUES(TO_TIMESTAMP(%s), %s, {2})
                        ON CONFLICT (rule, time) DO UPDATE SET {3}""".format(route.lower(), pars, placeholders, detailed_increment), data)
            pg.commit()
        print(", detailed statistics done"); sys.stdout.flush()
    pg_cursor.close()
except Exception, e:
    print("Exception: %s.%s" % (str(e), get_traceback()), file=sys.stderr); sys.stderr.flush()
