#!/usr/bin/python2
# encoding: utf-8
# kate: space-indent on; indent-width 4; replace-tabs on;
#
from __future__ import print_function
import os, os.path, sys
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
import re, psycopg2, pymongo
from urllib import urlopen
from time import strftime, mktime
from datetime import datetime
from traceback import format_exception

WORKING_DIR = os.environ['HOME']
LOGFILE = "%s/logs/conv_abuses_db.log" % WORKING_DIR
RETRY_COUNT = 3
CHUNK_SIZE = 10000
MONGO = {
    'cluster': 'so_db',
    'port':    27017,
    'hosts':   'db1j.so.yandex.net,db1m.so.yandex.net,db1o.so.yandex.net',
    'db':      'cmpl',
    'user':    'cmpl'
}
PG = {
    "host":    "iva-xb3w4z7jjjpuf8sd.db.yandex.net,myt-3mdnt0clvntgeqmv.db.yandex.net,sas-5nnjcnus304urap7.db.yandex.net",
    "port":    6432,
    "db":      "complaints",
    "user":    "cmpl"
}
def get_traceback():
    exc_type, exc_value, exc_traceback = sys.exc_info()
    tb = ''
    for step in format_exception(exc_type, exc_value, exc_traceback):
        try:
            tb += "\t" + step.strip() + "\n"
        except:
            pass
    return tb

def writelog(msg, isTB = False):
    if not msg: return
    try:
        tb = "\n"
        if isTB:
            tb = get_traceback()
        f = open(LOGFILE, 'a')
        f.write(strftime("[%Y-%m-%d %H:%M:%S]: ") + msg + tb)
        f.close()
    except Exception, e:
        print("Writelog error: %s" % str(e), file = sys.stderr)

def output(message, message_type = "INFO", end = "\n"):
    print("[%s] %s %s" % (strftime("%Y-%m-%d %H:%M:%S"), message_type, message), end=end)
    sys.stdout.flush()

def doRequest(url, prompt):
    try:
        f = urlopen(url)
        if f.getcode() == 200:
            return f.read()
        else:
            writelog('{0} response HTTP code: {1}, body: {2}'.format(prompt, f.getcode(), f.info()))
    except Exception, e:
        writelog('%s HTTP request failed: %s.' % (prompt, str(e)), True)
    return ""

def cluster_hosts(conductor_group, default_hosts = []):
    if conductor_group:
        for i in range(RETRY_COUNT):
            r = doRequest("https://c.yandex-team.ru/api-cached/groups2hosts/%s" % conductor_group, "Get DB cluster hosts for group %s" % conductor_group)
            if r:
                hosts = map(str.strip, r.splitlines())
                return hosts if len(hosts) > 0 else default_hosts
            else: continue
    return default_hosts

def mongo_conn_str(cfg):
    s = "{0}:{1}@".format(cfg['user'], cfg['passwd']) if 'user' in cfg else ""
    return "mongodb://%s%s/%s" % (s, ','.join(cluster_hosts(cfg['cluster'], ["db1%s.so.yandex.net" % dc for dc in "hmj"])), cfg['db'])

def getMongoDBCredentials(cfg):
    f, CURDIR = None, WORKING_DIR
    try:
        if not os.path.exists('%s/.mongodb.%s' % (CURDIR, cfg['db'])):
            CURDIR = os.path.dirname(os.path.abspath(__file__))
        f = open('%s/.mongodb.%s' % (CURDIR, cfg['db']))
        for line in f:
            sf = line.split(':')
            if len(sf) == 2:
                cfg['user'], cfg['passwd'] = sf[0], sf[1].strip()
                break
        f.close()
    except Exception, e:
        writelog("getCredentials exception: %s" % str(e), True)

def getPGCredentials(cfg):
    f, CURDIR, dbname = None, WORKING_DIR, cfg['db'][:len(cfg['db']) - 2] if cfg['db'].endswith('db') else cfg['db']
    try:
        if not os.path.exists('{0}/.pgpass.{1}'.format(CURDIR, dbname)):
            CURDIR = os.path.dirname(os.path.abspath(__file__))
        f = open('{0}/.pgpass.{1}'.format(CURDIR, dbname))
        for line in f:
            sf = line.split(':')
            if len(sf) == 5 and sf[2] == cfg['db']:
                cfg['host'], cfg['port'], cfg['user'], cfg['password'] = sf[0], int(sf[1]), sf[3], sf[4].strip()
                break
        f.close()
    except Exception, e:
        writelog("getPGCredentials exception: %s" % str(e), True)

def getPGdb(cfg):
    if not hasattr(getPGdb, "%s_connection" % cfg['db']) or hasattr(getattr(getPGdb, "%s_connection" % cfg['db']), 'closed') and getattr(getattr(getPGdb, "%s_connection" % cfg['db']), 'closed'):
        if hasattr(psycopg2, '__libpq_version__'):
            setattr(getPGdb, "%s_connection" % cfg['db'], psycopg2.connect(dbname = cfg['db'], user = cfg['user'], password = cfg['password'], host = cfg['host'], port = cfg['port'], sslmode = 'verify-full', sslrootcert = '%s/.pgsql/root.crt' % WORKING_DIR))
        else:
            if ',' in cfg['host']:
                for host in cfg['host'].split(','):
                    try:
                        pg = psycopg2.connect(database = cfg['db'], user = cfg['user'], password = cfg['password'], host = host, port = cfg['port'], sslmode = 'verify-full', sslrootcert = '%s/.pgsql/root.crt' % WORKING_DIR)
                        pg_cursor = pg.cursor()
                        pg_cursor.execute("SELECT pg_is_in_recovery()")
                        res = pg_cursor.fetchone()
                        pg_cursor.close()
                        if res and not res[0]:
                            setattr(getPGdb, "%s_connection" % cfg['db'], pg)
                            break
                    except Exception, e:
                        writelog("getPGdb exception: %s" % str(e), True)
                        output("getPGdb exception: %s" % str(e))
                        continue
            else:
                setattr(getPGdb, "%s_connection" % cfg['db'], psycopg2.connect(database = cfg['db'], user = cfg['user'], password = cfg['password'], host = cfg['host'], port = cfg['port'], sslmode = 'verify-full', sslrootcert = '%s/.pgsql/root.crt' % WORKING_DIR))
    return getattr(getPGdb, "%s_connection" % cfg['db'])

def getMongoDB(cfg):
    if not hasattr(getMongoDB, "%s_connection" % cfg['db']):
        setattr(getMongoDB, "%s_connection" % cfg['db'], pymongo.MongoClient(host = mongo_conn_str(cfg), port = cfg['port'], connectTimeoutMS = 60000, socketTimeoutMS = 60000)[cfg['db']])
    return getattr(getMongoDB, "%s_connection" % cfg['db'])

getMongoDBCredentials(MONGO)
mongo = getMongoDB(MONGO)
getPGCredentials(PG)
pg = getPGdb(PG)
offset = attempt = pg_attempt = 0
try:
    min_date = sys.argv[1] if len(sys.argv) > 1 else '2018-01-12'
    min_cmpl_time = int(mktime(datetime.strptime(min_date, '%Y-%m-%d').timetuple()))
except Exception, e:
    output("Exception: %s" % str(e))
    sys.exit(1)
output("Converting table 'abuses': STARTED")
while True:
    try:
        data = []
        try:
            for i, r in enumerate(mongo['abuses'].find({'cmpldata': {'$gte': min_cmpl_time}}, skip=offset, limit=CHUNK_SIZE)):
                source = r["source"] if "source" in r and r['source'] else '-'
                stid = r["stid"] if "stid" in r and r['stid'] else ""
                uid = int(r["uid"]) if "uid" in r and r['uid'] else 0
                suid = int(r["suid"]) if "suid" in r and r['suid'] else 0
                move = int(r["move"]) if "move" in r else -1
                foo = int(r["foo"]) if "foo" in r else -1
                cnt = int(r["cnt"]) if "cnt" in r and r['cnt'] else 0
                cmpldate = int(r["cmpldata"]) if "cmpldata" in r and r['cmpldata'] else 0
                dt = int(r["data"]) if "data" in r and r['data'] else 0
                tup = (source, stid, uid, suid, move, foo, cnt, cmpldate, dt)
                data.append(tup)
        except Exception, e:
            output("MongoDB exception: %s.%s" % (str(e), get_traceback()), 'ERR ')
            attempt += 1
            if attempt < 10:
                output("Rereading of records from position %s (attempt %s)" % (offset, attempt))
                continue
        if attempt < 10:
            if len(data) < 1:
                break
            while pg_attempt < 10:
                try:
                    pg_cursor = pg.cursor()
                    pg_cursor.execute("INSERT INTO abuses (source, stid, uid, suid, move, foo, cnt, cmpldata, data) VALUES %s ON CONFLICT DO NOTHING" % ','.join(pg_cursor.mogrify("%s", (x, )) for x in data))
                    pg.commit()
                    break
                except Exception, e:
                    try:
                        pg.rollback()
                        pg_cursor.close()
                    except:
                        pass
                    pg_attempt += 1
                    if hasattr(e, 'pgcode') and hasattr(e, 'pgerror'):
                        output("psycopg2 Error (code: %s): %s" % (e.pgcode, e.pgerror), 'ERR ')
                    else:
                        output("Exception: %s.%s" % (str(e), get_traceback()), 'ERR ')
                    output("Attempt #%s to save data to PGaaS" % pg_attempt)
                    pg = getPGdb(PG)
        attempt = pg_attempt = 0
    except Exception, e:
        output("DB exception: %s.%s" % (str(e), get_traceback()), 'ERR ')
        continue
    offset += CHUNK_SIZE
    if not offset % 100000:
        output("Converting table 'abuses', offset: %s" % offset, end='\r')
output("Converting table 'abuses': DONE")
