#!/usr/bin/env python
# -*- coding: utf-8 -*-

"""
Скрипт для бэкапа и мониторинга монги модерации директа на zfs
Легко портируется на любую другую монгу, живущую на zfs, чуть сложнее под другой сервис на zfs
На входе принимает один параметр 'mon' - в это случае запускается как мониторинг, в остальных делает бэкап
Код живет в direct-utils
"""

import os
import subprocess
from subprocess import Popen, PIPE
import sys 
#import date
import datetime
import pymongo
from pymongo.read_preferences import ReadPreference
import yaml
from socket import getaddrinfo, getfqdn, AF_INET, AF_INET6
import time
from time import sleep
import shutil
import logging

do_shutdown = False

def die(msg, exit_code=1):
    #TODO: rollback changes, set status=aborted
    logging.error(msg)
    sys.exit(exit_code)

def monthdelta(date, delta):
    m, y = (date.month+delta) % 12, date.year + ((date.month)+delta-1) // 12
    if not m: m = 12
    d = min(date.day, [31,
        29 if y%4==0 and not y%400==0 else 28,31,30,31,30,31,31,30,31,30,31][m-1])
    return date.replace(day=d,month=m, year=y)

def getpools():
    p = Popen("zpool list -Ho name", shell=True, stdout=PIPE)
    zpools = p.communicate()[0].splitlines()
    if zpools != []:
        return zpools
    else:
        die("Can't get zfs pools")

def getzfs():
    p = Popen("zfs list -Ho name", shell=True, stdout=PIPE)
    zfs = p.communicate()[0].splitlines()
    if zfs != []:
        return zfs
    else:
        die("Can't get zfs")

def do_snapshot(pool, rsname, need_shutdown):
    if need_shutdown:
        pidfile="/var/run/"+service+"."+rsname+"/mongodb.pid"
        if os.path.isfile(pidfile) and need_shutdown:
            try:
                subprocess.call(["service",service+"."+rsname,"stop"])
                print "@".join([pool,strdatenow])
                subprocess.call(["zfs", "snapshot","@".join([pool,strdatenow])])
            except subprocess.CalledProcessError as e:
                die(e)
            else:
                logging.info("successfully create snapshot for " + pool)
            try:
                subprocess.check_call(["service",service+"."+rsname,"start"])
            except subprocess.CalledProcessError as e:
                print(e)
                logging.error("I CAN'T START %s, FIX AND START MANUALLY. SORRY..." % service)
        else:
            logging.info("%s.%s pidfile not found" % (service, pool))
    else:
        try:
            subprocess.call(["zfs", "snapshot","@".join([pool,strdatenow])])
        except subprocess.CalledProcessError as e:
            logging.error(':'.join([rsname,e]))


def fqdn_to_ip_url(hostname):
    (ip, family) = fqdn_to_ip(hostname)
    return ip if family == AF_INET else '[' + ip + ']'

def fqdn_to_ip(hostname):
    try:
        # ipv6 addresses are preferred
        return (getaddrinfo(hostname, None, AF_INET6)[0][4][0], AF_INET6)
    except:
        try:
            return (getaddrinfo(hostname, None, AF_INET)[0][4][0], AF_INET)
        except:
            return None

def get_mongo_connect(config, rsname, db, **kwargs):
    if not config["shards"].get(db+"."+rsname):
        logging.error("Config for shard %s.%s not found. DIE" % (db,rsname))
        return None
    fqdn = config["shards"][db+"."+rsname]["fqdn"]
    ip = fqdn_to_ip_url(fqdn)
    port = config["shards"][db+"."+rsname]["port"]
    user = config["shards"][db+"."+rsname]["user"]
    password = config["shards"][db+"."+rsname]["password"]
    try:
        logging.info('connecting to mongo server %s:%s, ip: %s, kwargs: %s' % (fqdn, str(port), ip, str(kwargs)))
        mongo_client = pymongo.MongoClient(ip, port, replicaset = db+"."+rsname, **kwargs)
        mongo_client.admin.authenticate(user, password)
    except Exception as e:
        logging.error('failed while connecting: %s' % (str(e),))
        return None
    else:
        logging.info('connecting to mongo server was successfully')
    return mongo_client

def check_or_backup_config_server(config, action):
    if action == 'backup':
        logging.info("trying to backup one of config server")
        p = Popen('which mongodump', shell=True, stdout=PIPE)
        config['mongodump_path'], _ = p.communicate()
        config['mongodump_path'] = config['mongodump_path'].replace('\n','')
        if p.returncode != 0:
            die('mongodump not found in PATH')

    for cfgsrv_opts in config['config_servers'].values():
        if action == 'backup':
            mongodump_args = [ config['mongodump_path'] ] + [ opt for opt in config['mongodump_options'] ]
            ip, _ = fqdn_to_ip(cfgsrv_opts['fqdn'])
            mongodump_args += [ '--host', ip, '--port', cfgsrv_opts['port'] ]
            if cfgsrv_opts.get('user'):
                mongodump_args += ['--username', cfgsrv_opts['user']]
            if cfgsrv_opts.get('password'):
                mongodump_args += ['-p%s' % cfgsrv_opts['password']]
            mongodump_args += ['--out', '%s/config_server/%s/%s' % (config['backup_dest_dir'], strdatenow, cfgsrv_opts['fqdn'])]
            mongodump_args = map(str, mongodump_args)
            logging.info('running mongodump with args from %s' % CONFIG_FILE)
            mongodump_process = Popen(mongodump_args)
            mongodump_process.wait()
            if mongodump_process.returncode != 0:
                logging.critical("mongodump %s process failed with return code %d" % ( cfgsrv_opts['fqdn'], mongodump_process.returncode))
            else:
                logging.info("config server %s successfully backed up" % cfgsrv_opts['fqdn'])
        elif action == 'monitoring':
            #Mon: {cfgsrv_opts:['True/False','status']}
            logging.debug("Checking backup of config server " + cfgsrv_opts['fqdn'])
            for i in get_days_mon():
                check_dir= '%s/config_server/%s/%s' % (config['backup_dest_dir'], i, cfgsrv_opts['fqdn'])
                try:
                    os.path.getmtime(check_dir)
                except OSError as e:
                    logging.debug(str(e))
                    mon[cfgsrv_opts['fqdn']] = [False, "Backup " + cfgsrv_opts['fqdn'] + " not found"]
                else:
                    mon[cfgsrv_opts['fqdn']] = [True, "Backup %s found" % cfgsrv_opts['fqdn']]
                    break
            if mon[cfgsrv_opts['fqdn']][0]:
                logging.debug("Backup %s found" % cfgsrv_opts['fqdn'])
            else:
                logging.debug( "Backups for the %s days not found" % str(daystowaitbackup) )


def delete_backup_cfgsrv(config, dateold):
    logging.info("remove old backup of config_server")
    for backup_dir in os.listdir(config['backup_dest_dir'] + "/config_server/"):
        if datetime.datetime.strptime(backup_dir, "%Y%m%d").date() > dateold:
            continue
        olddir = config['backup_dest_dir'] + "/config_server/" + backup_dir
        try:
            shutil.rmtree(olddir)
        except Exception as e:
            logging.info("I can't remove old backup from %s - %s" % (olddir, e))
        else:
             logging.info("successfully remove old backup for date " + backup_dir)

def get_snapshot_list(fs):
    p = Popen('zfs list -t snapshot -r %s -Ho name' % fs,shell=True, stdout=PIPE)
    return p.communicate()[0].splitlines()


def delete_old_snapshot(pool, dateold):
    for snapshot in get_snapshot_list(pool):
        snapshot_name = snapshot.split('@')[-1]
        try:
            snapshot_date = datetime.datetime.strptime(snapshot_name, "%Y%m%d")
        except ValueError:
            p = Popen('zfs get creation -Ho value %s' % snapshot ,shell=True, stdout=PIPE)
            k = p.communicate()[0].splitlines()[0]
            snapshot_date = datetime.datetime.strptime(k, "%a %b %d %H:%M %Y")
        if  snapshot_date.date() < dateold:
            try:
                subprocess.call(["zfs","destroy",pool+"@"+strdateold])
            except subprocess.CalledProcessError as e:
                logging.info('Cannot del old snapshot in pool %s' % pool)

def check_snapshot_backup(pool):
    logging.debug("Checking snapshot of pool " + pool)
    snap_names = []
    for strdate in get_days_mon():
        snap_names.append('%s@%s' % ( pool, strdate ))
    zpool_snaps = get_snapshot_list(pool)
    if zpool_snaps == []:
        logging.debug('Snapshots for pool %s not found' % pool)
        return [False, 'snapshots for pool %s not found' % pool]
    #if [str(o) for str(o) in zpool_snaps if str(o) in zpool_snaps]:
    found_snap = list(set(snap_names).intersection(zpool_snaps))
    if found_snap:
        found_snap.sort()
        found_snap.reverse()
        logging.debug("Found: " + ' '.join(found_snap))
        logging.debug('last snapshot - %s' % str(found_snap[0]))
        return [True, 'last snapshot - %s' % str(found_snap[0])]
    else:
        logging.debug("No snapshots found for fs %s in %s last days" % (pool,str(daystowaitbackup)))
        return [False, 'no snapshots found for fs %s in %s last days' % (pool,str(daystowaitbackup)) ]

def get_days_mon():
    daystomon=[]
    for i in range(int(daystowaitbackup)):
        strdatemon = (datenow  - datetime.timedelta(days = i)).strftime('%Y%m%d')
        daystomon.append(strdatemon)
    return daystomon


if __name__ == '__main__':
    logfile = '/var/log/mongo-backup/log.%s' % str(datetime.datetime.now().date())
    logging.basicConfig(
                        format = '%(levelname)-8s [%(asctime)s] %(message)s', 
                        level = logging.DEBUG, 
                        filename = logfile 
                        )

    if len(sys.argv) > 1 and sys.argv[1] == 'mon':
        logging.debug(5 * "---" + "Start as monitoring" + 5 * "---")
        act = 'monitoring'
    else:
        logging.info(5 * "---" + "Start as backuper" + 5 * "---")
        act = 'backup'

    CONFIG_FILE = '/etc/yandex-du-mongozfs-backup/moddb.yml'
    my_fqdn = getfqdn()
    service = 'mongodb-moddb'
    db = 'moddb'

    config = {}
    mon = {}
    with open(CONFIG_FILE, 'r') as cfg:
        config = yaml.safe_load(cfg)
    if config == {}:
        die("Can't load config from %s" % CONFIG_FILE)
    connect_args = { 'read_preference': ReadPreference.PRIMARY,
                'socketTimeoutMS': config['socket_timeout'] * 1000,
                'connectTimeoutMS': config['connect_timeout'] * 1000,
                'serverSelectionTimeoutMS': config['server_selection_timeout'] * 1000,
                'socketKeepAlive': True }
    maxage = config.get('maxage')
    daystowaitbackup = config.get('daystowaitbackup')
    do_shutdown = config.get('do_shutdown', False)
    datenow = datetime.datetime.now().date()
    strdatenow = datenow.strftime('%Y%m%d')
    strdateold = monthdelta(datenow, -maxage).strftime('%Y%m%d')
    pools = getpools()
    allzfs = getzfs()
    filesystems = [item for item in allzfs if item not in pools]
    if filesystems == []:
        filesystems = pools

    for pool in filesystems:
        rsname = pool.split('/')[-1]
        if act == 'monitoring':
            mon[pool] = check_snapshot_backup(pool)
            continue
        logging.info("Backup %s.%s \n" % (service, rsname))
        m_client = get_mongo_connect(config, rsname, db, **connect_args)
        if m_client is None:
            continue
        members = m_client.admin.command('replSetGetStatus')['members']
        for member in members:
            if member.get('name').split(':')[0] == my_fqdn:
                my_timestamp = time.mktime(member.get('optime').as_datetime().timetuple())
                if int(time.mktime(time.gmtime())-my_timestamp) < config['max_replica_delay']:
                    do_snapshot(pool, rsname, do_shutdown)
                else:
                    print('Replica position so far from master')
        delete_old_snapshot(pool, monthdelta(datenow, -maxage))


    check_or_backup_config_server(config, act)
    if act == 'monitoring':
        if mon != {} and all( item[0] for item in mon.values() ):
            print("0;OK")
        else:
            print("2;" + ','.join( list(item[1] for item in mon.values() if not item[0]) ))
    else:
        delete_backup_cfgsrv(config, monthdelta(datenow, -2 * maxage))
