#!/usr/bin/env python
# -*- coding: utf-8 -*-

from __future__ import absolute_import, print_function, unicode_literals

import argparse
import collections
import logging
import operator
import os
import re
import socket
import subprocess
import sys
import time
import json
import requests
from ConfigParser import SafeConfigParser

import psycopg2
from psycopg2.extras import RealDictCursor


def getPortoConnection():
    try:
        global porto
        import porto
    except ImportError:
        log.error('Porto requested but python-portopy is not installed.')
        sys.exit(1)

    conn = porto.Connection()
    conn.connect()
    return conn


def get_free_space():
    """Get free disk space"""
    conn = psycopg2.connect('user=postgres dbname=postgres')
    conn.autocommit = True
    cur = conn.cursor()
    cur.execute('SHOW data_directory')
    pgdata = cur.fetchone()[0]
    stat = os.statvfs(pgdata)
    free = stat.f_bavail * stat.f_frsize
    return free


def load_query(bloat_type):
    """load query from file"""
    path = '/usr/local/yandex/sqls'
    sqls = {'table_bloat': '{}/table_bloat.sql'.format(path),
            'index_bloat': '{}/index_bloat.sql'.format(path)}
    with open(sqls[bloat_type]) as inp_file:
        return inp_file.read()


def checkExtVersion(db, repackPath):
    log = logging.getLogger('ext version check ' + db)
    installedVersion = None
    conn = psycopg2.connect('user=postgres dbname=' + db)
    conn.autocommit = True
    try:
        cur = conn.cursor()
        cur.execute("SELECT installed_version FROM pg_available_extensions " +
                    "WHERE installed_version IS NOT NULL AND " +
                    "name = 'pg_repack'")
        installedVersion = cur.fetchone()[0]
    except Exception:
        pass

    log.debug('Running "' + repackPath + ' --version"')

    repack = subprocess.Popen(
        [repackPath, '--version'], stdout=subprocess.PIPE)

    repack.wait()
    expectedVersion = repack.stdout.readline().split()[1].rstrip()

    if installedVersion is None:
        log.warning('No pg_repack installed in %s' % db)
        cur = conn.cursor()
        cur.execute('CREATE EXTENSION pg_repack')
    elif expectedVersion != installedVersion:
        log.warning('Old pg_repack installed in %s' % db)
        cur = conn.cursor()
        cur.execute('DROP EXTENSION pg_repack')
        cur.execute('CREATE EXTENSION pg_repack')


def runPrep(config):
    log = logging.getLogger('prepare')
    conn = psycopg2.connect('user=postgres dbname=postgres')
    conn.autocommit = True
    cur = conn.cursor()
    cur.execute('SHOW transaction_read_only')
    ro = (cur.fetchone()[0] == "on")
    if ro:
        log.info('Replica. Nothing to do here.')
        return []

    cur.execute('SELECT datname FROM pg_database')

    filterOut = set(['postgres', 'template0', 'template1'])
    dbs = filter(lambda x: x not in filterOut, [i[0] for i in cur.fetchall()])

    for d in dbs:
        log.info('Processing ' + d)
        checkExtVersion(d, config.get('main', 'repack_path'))

    if config.get('main', 'use_porto') == 'yes':
        conn = getPortoConnection()
        try:
            container = conn.Find('self/index_repack')
        except porto.exceptions.ContainerDoesNotExist:
            container = conn.Create('self/index_repack')

        if container:
            if container.GetData('state') == 'stopped':
                container.SetProperty('isolate', False)
            container.SetProperty('cpu_limit', config.get('main', 'cpu_limit'))
            container.SetProperty('io_limit', config.get('main', 'io_limit'))
            if container.GetData('state') == 'stopped':
                container.Start()

    return dbs


def analyse_bloat_table(config, db):
    percMin = config.getint('main', 'bloat_perc_min')
    bytesMin = config.getint('main', 'bloat_bytes_min')
    conn = psycopg2.connect('user=postgres dbname=' + db)
    cur = conn.cursor(cursor_factory=RealDictCursor)
    cur.execute(load_query('table_bloat'))
    res = cur.fetchall()
    to_repack = {}
    for row in res:
        if row['bytes_bloat_size'] > bytesMin and row['bloat_ratio'] > percMin:
            table = '{}.{}'.format(row['schemaname'], row['tblname'])
            # index size after repack
            to_repack[table] = int(row['total_real_size'] -
                                   row['bytes_bloat_size'])
    return to_repack


def analyse_bloat_index(config, db):
    percMin = config.getint('main', 'bloat_perc_min')
    bytesMin = config.getint('main', 'bloat_bytes_min')
    conn = psycopg2.connect('user=postgres dbname=' + db)
    cur = conn.cursor(cursor_factory=RealDictCursor)
    cur.execute(load_query('index_bloat'))
    res = cur.fetchall()
    to_repack = {}
    for row in res:
        if row['bloat_bytes'] > bytesMin and row['bloat_pct'] > percMin:
            index = '{}.{}'.format(row['schema_name'], row['index_name'])
            # index size after repack
            to_repack[index] = int(row['index_bytes'] - row['bloat_bytes'])
    return to_repack


def skip_obj(config, obj):
    if config.has_option('main', 'except_obj_list'):
        raw_list = config.get('main', 'except_obj_list')
    else:
        raw_list = '^$'
    skip_regexp_list = raw_list.split(',')
    for skip_regexp in skip_regexp_list:
        if re.search(skip_regexp, obj):
            return True
    return False


def repack_table(config, db, table):
    log = logging.getLogger(table + ' repack')
    log.info('starting table repack')

    cmdline = [
        config.get('main', 'repack_path'),
        '--wait-timeout=' + config.get('main', 'wait-timeout'),
        '--table=' + table, db
    ]
    return runRepack(config, db, table, cmdline)


def repack_index(config, db, index):
    log = logging.getLogger(index + ' repack')
    log.info('starting index repack')

    cmdline = [
        config.get('main', 'repack_path'),
        '--wait-timeout=' + config.get('main', 'wait-timeout'),
        '--index=' + index, db
    ]
    return runRepack(config, db, index, cmdline)


def set_downtime(config, duration=24 * 3600):
    """
    Set downtime via juggler-api
    """
    headers = {
        'Authorization':
            'OAuth {token}'.format(token=config.get('juggler', 'token')),
        'Content-Type':
            'application/json',
    }
    namespace = config.get('juggler', 'namespace')
    dt_services = [ s.strip() for s in config.get('juggler', 'services').split(',') ]
    now = int(time.time())
    end = now + int(duration)
    fqdn = config.get('juggler', 'fqdn')
    namespace = config.get('juggler', 'namespace')
    data = json.dumps({
        'end_time': end,
        'start_time': now,
        'description': 'index repack is in progress',
        'filters': [ {
            'host': fqdn,
            'namespace': namespace,
            'service': service
        } for service in dt_services ]
    })
    log.debug('juggler dt json: ' + repr(data))
    resp = requests.post(
        config.get('juggler', 'url'),
        headers=headers,
        data=data,
        verify=False,
    )
    if resp.status_code != 200:
        log.error('Could not set downtime ({code}: {text})'.format(
            code=resp.status_code, text=resp.text))


def runRepack(config, db, obj, cmdline):
    log = logging.getLogger(obj + ' repack')
    log.info('starting repack')

    conn = psycopg2.connect('user=postgres dbname=postgres')
    conn.autocommit = True
    cur = conn.cursor()

    repack = subprocess.Popen(cmdline)

    if config.get('main', 'use_porto') == 'yes':
        conn = getPortoConnection()

    while repack.poll() is None:
        cur.execute("SELECT pid FROM pg_stat_activity " +
                    "WHERE application_name = 'pg_repack' AND " +
                    "datname = '" + db + "'")
        pids = [i[0] for i in cur.fetchall()]
        if pids:
            if config.get('main', 'use_porto') == 'yes':
                for pid in pids:
                    try:
                        conn.AttachProcess('self/index_repack', pid,
                                           config.get('main', 'postgres_comm'))
                    except porto.exceptions.PermissionError:
                        pass  # PID is already in the subcontainer
            else:
                subprocess.call(
                    ['cgclassify', '-g',
                     config.get('main', 'cgroup')] + map(str, pids))

        time.sleep(10)

    if repack.returncode != 0:
        log.error('%s repack failed' % db)
    else:
        log.info('Repack of %s db completed.' % db)


if __name__ == '__main__':
    config = SafeConfigParser()
    arg = argparse.ArgumentParser(description="""
            pg_repack smart runner
            """)

    scope = arg.add_mutually_exclusive_group(required=True)

    scope.add_argument(
        '-t',
        '--tables',
        action="store_const",
        dest="scope",
        const="table",
        help='operate on tables')
    scope.add_argument(
        '-i',
        '--indices',
        action="store_const",
        dest="scope",
        const="index",
        help='operate on tables` indices')

    arg.add_argument(
        '-c',
        '--config',
        type=str,
        required=False,
        metavar='<path>',
        default='/etc/pg_index_repack.conf',
        help='path to config')

    arg.set_defaults(scope='index')

    cmdargs = arg.parse_args()
    cmdvars = vars(cmdargs)

    config.read(cmdvars.get('config'))
    for arg, value in cmdvars.items():
        config.set(section='main', option=arg, value=value)

    logging.basicConfig(
        level=getattr(logging,
                      config.get('main', 'log_level').upper()),
        format='%(asctime)s [%(levelname)s] %(name)s:\t%(message)s')
    log = logging.getLogger('main')

    dt_active = False

    for db in runPrep(config):
        analyze_func = {
            'index': analyse_bloat_index,
            'table': analyse_bloat_table
        }
        repack_func = {'index': repack_index, 'table': repack_table}
        reserv_perc = (100 - config.getfloat('main', 'reserv_perc')) / 100
        # analyse_bloat_index(...) or analyse_bloat_table(...)
        analyze_dict = analyze_func[cmdargs.scope](config, db)
        analyze = collections.OrderedDict(
            sorted(analyze_dict.items(), key=lambda t: t[1]))

        if analyze:
            if not dt_active:
                set_downtime(config)
                dt_active = True
            else:
                log.debug('dowtime already set, skipping')

        for obj in analyze:
            # reserve 3% free space, need check it before each repack
            free_space = int(get_free_space() * reserv_perc)
            # neeed 2x object size while repack
            if not skip_obj(config, obj) and free_space > analyze[obj] * 2:
                repack_func[cmdargs.scope](config, db, obj)
            else:
                log.info('Skip object [%s]' % repr(obj))

    if config.get('main', 'use_porto') == 'yes':
        conn = getPortoConnection()
        try:
            container = conn.Find('self/index_repack')
            if container.GetProperty('process_count') == '0':
                container.Destroy()
        except porto.exceptions.ContainerDoesNotExist:
            pass
