#!/usr/bin/python2
# encoding: utf-8
# kate: space-indent on; indent-width 4; replace-tabs on;
#
from __future__ import print_function
import os, os.path, sys
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
import re, psycopg2, pymongo
from urllib import urlopen
from time import strftime
from collections import defaultdict
from traceback import format_exception

WORKING_DIR = os.environ['HOME']
LOGFILE = "%s/logs/conv_compl_stat_db.log" % WORKING_DIR
RETRY_COUNT = 3
CHUNK_SIZE = 1000
MONGO = {
    'cluster': 'mail_so_813',
    'port':    27017,
    'hosts':   'compldb1m.so.yandex.net,compldb1j.so.yandex.net,compldb1o.so.yandex.net',
    'db':      'cmpl'
}
PG = {
    "host":    "iva-xb3w4z7jjjpuf8sd.db.yandex.net,myt-3mdnt0clvntgeqmv.db.yandex.net,sas-5nnjcnus304urap7.db.yandex.net",
    "port":    6432,
    "db":      "complaints",
    "user":    "cmpl"
}
def get_traceback():
    exc_type, exc_value, exc_traceback = sys.exc_info()
    tb = ''
    for step in format_exception(exc_type, exc_value, exc_traceback):
        try:
            tb += "\t" + step.strip() + "\n"
        except:
            pass
    return tb

def writelog(msg, isTB = False):
    if not msg: return
    try:
        tb = "\n"
        if isTB:
            tb = get_traceback()
        f = open(LOGFILE, 'a')
        f.write(strftime("[%Y-%m-%d %H:%M:%S]: ") + msg + tb)
        f.close()
    except Exception, e:
        print("Writelog error: %s" % str(e), file = sys.stderr)

def output(message, message_type = "INFO"):
    print("[%s] %s %s" % (strftime("%Y-%m-%d %H:%M:%S"), message_type, message))
    sys.stdout.flush()

def doRequest(url, prompt):
    try:
        f = urlopen(url)
        if f.getcode() == 200:
            return f.read()
        else:
            writelog('{0} response HTTP code: {1}, body: {2}'.format(prompt, f.getcode(), f.info()))
    except Exception, e:
        writelog('%s HTTP request failed: %s.' % (prompt, str(e)), True)
    return ""

def cluster_hosts(conductor_group, default_hosts = []):
    if conductor_group:
        for i in range(RETRY_COUNT):
            r = doRequest("https://c.yandex-team.ru/api-cached/groups2hosts/%s" % conductor_group, "Get DB cluster hosts for group %s" % conductor_group)
            if r:
                hosts = map(str.strip, r.splitlines())
                return hosts if len(hosts) > 0 else default_hosts
            else: continue
    return default_hosts

def mongo_conn_str(cfg):
    s = "{0}:{1}@".format(cfg['user'], cfg['passwd']) if 'user' in cfg else ""
    return "mongodb://%s%s/%s" % (s, ','.join(cluster_hosts(cfg['cluster'], ["db1%s.so.yandex.net" % dc for dc in "hmj"])), cfg['db'])

def getPGCredentials(cfg):
    f, CURDIR, dbname = None, '/home/klimiky/test', cfg['db'][:len(cfg['db']) - 2] if cfg['db'].endswith('db') else cfg['db']
    try:
        if not os.path.exists('{0}/.pgpass.{1}'.format(CURDIR, dbname)):
            CURDIR = os.path.dirname(os.path.abspath(__file__))
        f = open('{0}/.pgpass.{1}'.format(CURDIR, dbname))
        for line in f:
            sf = line.split(':')
            if len(sf) == 5 and sf[2] == cfg['db']:
                cfg['host'], cfg['port'], cfg['user'], cfg['password'] = sf[0], int(sf[1]), sf[3], sf[4].strip()
                break
        f.close()
    except Exception, e:
        writelog("getPGCredentials exception: %s" % str(e), True)

def getPGdb(cfg):
    if not hasattr(getPGdb, "%s_connection" % cfg['db']) or hasattr(getattr(getPGdb, "%s_connection" % cfg['db']), 'closed') and getattr(getattr(getPGdb, "%s_connection" % cfg['db']), 'closed'):
        if hasattr(psycopg2, '__libpq_version__'):
            setattr(getPGdb, "%s_connection" % cfg['db'], psycopg2.connect(dbname = cfg['db'], user = cfg['user'], password = cfg['password'], host = cfg['host'], port = cfg['port'], sslmode = 'verify-full', sslrootcert = '%s/.pgsql/root.crt' % WORKING_DIR))
        else:
            if ',' in cfg['host']:
                for host in cfg['host'].split(','):
                    try:
                        pg = psycopg2.connect(database = cfg['db'], user = cfg['user'], password = cfg['password'], host = host, port = cfg['port'], sslmode = 'verify-full', sslrootcert = '%s/.pgsql/root.crt' % WORKING_DIR)
                        pg_cursor = pg.cursor()
                        pg_cursor.execute("SELECT pg_is_in_recovery()")
                        res = pg_cursor.fetchone()
                        pg_cursor.close()
                        if res and not res[0]:
                            setattr(getPGdb, "%s_connection" % cfg['db'], pg)
                            break
                    except Exception, e:
                        writelog("getPGdb exception: %s" % str(e), True)
                        output("getPGdb exception: %s" % str(e))
                        continue
            else:
                setattr(getPGdb, "%s_connection" % cfg['db'], psycopg2.connect(database = cfg['db'], user = cfg['user'], password = cfg['password'], host = cfg['host'], port = cfg['port'], sslmode = 'verify-full', sslrootcert = '%s/.pgsql/root.crt' % WORKING_DIR))
    return getattr(getPGdb, "%s_connection" % cfg['db'])

def getMongoDB(cfg):
    if not hasattr(getMongoDB, "%s_connection" % cfg['db']):
        setattr(getMongoDB, "%s_connection" % cfg['db'], pymongo.MongoClient(host = mongo_conn_str(cfg), port = cfg['port'], connectTimeoutMS = 10000, socketTimeoutMS = 10000)[cfg['db']])
    return getattr(getMongoDB, "%s_connection" % cfg['db'])

getPGCredentials(PG)
mongo = getMongoDB(MONGO)
pg = getPGdb(PG)
offset, sources = 0, defaultdict(lambda: defaultdict(lambda: defaultdict(lambda: defaultdict(lambda: defaultdict(lambda: None)))))
output("Converting table 'compl_stat': STARTED")
while True:
    try:
        data = []
        for i, r in enumerate(mongo['compl_stat'].find(skip=offset, limit=CHUNK_SIZE)):
            empty_ip = int(r["empty_ip"]) if "empty_ip" in r and r['empty_ip'] else 0
            with_mail = int(r["with_mail"]) if "with_mail" in r and r['with_mail'] else 0
            seen = r["seen"] if "seen" in r and r['seen'] else ""
            min_time = int(r["min_time"]) if "min_time" in r and r['min_time'] else 0
            max_time = int(r["max_time"]) if "max_time" in r and r['max_time'] else 0
            accum_time = int(r["accum_time"]) if "accum_time" in r and r['accum_time'] else 0
            http_200 = int(r["http_200"]) if "http_200" in r and r['http_200'] else 0
            http_499 = int(r["http_499"]) if "http_499" in r and r['http_499'] else 0
            http_other = int(r["http_other"]) if "http_other" in r and r['http_other'] else 0
            tup = (r['date'], r['source'], r['footype'], r['host'], seen, int(r['total']), empty_ip, with_mail, min_time, max_time, accum_time, http_200, 0, http_499, 0, http_other)
            if sources[r['date']][r['host']][r['source']][r['footype']][seen] is None:
                sources[r['date']][r['host']][r['source']][r['footype']][seen] = i
                data.append(tup)
            else:
                k = sources[r['date']][r['host']][r['source']][r['footype']][seen]
                output("Double: 1. %s\n2. %s" % (str(data[k]), str(tup)))
                t = data[k]
                tup2 = (r['date'], r['source'], r['footype'], r['host'], seen, t[5] + tup[5], t[6] + tup[6], t[7] + tup[7], t[8] if t[8] < tup[8] else tup[8], t[9] if t[9] > tup[9] else tup[9], t[10] + tup[10], t[11] + tup[11], 0, t[13] + tup[13], 0, t[15] + tup[15])
                data[k] = tup2
        if len(data) < 1:
            break
        else:
            output("Data count in chunk: %s" % len(data))
        try:
            pg_cursor = pg.cursor()
            pg_cursor.execute("""INSERT INTO compl_stat (date, source, footype, host, seen, total, empty_ip, with_mail, min_time, max_time, accum_time,
                http_200, http_4xx, http_499, http_5xx, http_other) VALUES """ + ','.join(pg_cursor.mogrify("%s", (x, )) for x in data))
            pg.commit()
        except psycopg2.DatabaseError, e:
            pg.rollback()
            pg_cursor.close()
            output("psycopg2 Error (code: %s): %s" % (e.pgcode, e.pgerror), 'ERR ')
            break
        except Exception, e:
            pg.rollback()
            pg_cursor.close()
            output("Exception: %s.%s" % (str(e), get_traceback()), 'ERR ')
            break
    except Exception, e:
        output("DB exception: %s.%s" % (str(e), get_traceback()), 'ERR ')
    offset += CHUNK_SIZE
    if not offset % 10000:
        output("Converting table 'abuses', offset: %s" % offset)
output("Converting table 'compl_stat': DONE")
