#! /usr/bin/env python
#-*- coding: utf-8 -*-

import os
import sys
import ConfigParser
from optparse import OptionParser
import re
import subprocess
import time
import logging
import logging.handlers
import shlex
import select

CONF_DIR='/etc/monrun/conf.d'
TEMP_DIR = '/dev/shm'
LOCK_FILE = 'restarter.lock'

EXT = 'attempts'

mon_re = re.compile('^(PASSIVE-CHECK:[a-zA-Z0-9-_.]+?;)?(?P<errcode>[0-9]);(?P<description>.*)$')
logger = logging.getLogger(__name__)
LOG = logging.handlers.SysLogHandler(address = '/dev/log')
formatter = logging.Formatter('restarter.py: %(levelname)s %(message)s')
LOG.setFormatter(formatter)
logger.addHandler(LOG) 
logger.setLevel(logging.INFO)

def execute_command(cmd):
    if isinstance(cmd, list):
        cmd = ' '.join(cmd)
    if len(cmd) == 0: 
        return ("", "Empty run command!")
    p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE, shell=True)

    (stdout, stderr) = p.communicate()
    return (stdout, stderr)

def is_status_error(check, description_pattern):
    """
    Obtain check status and description, compare description_pattern with description and return True or False
    """
    try:
        check_command = ["/usr/bin/monrun", "-v", check, "-f", "nagios"]
        monrun_stdout, monrun_stderr = execute_command(check_command)
        if monrun_stderr:
            logger.warning("%s: Monrun stderr:\n%s" % (check, monrun_stderr))
    except OSError as e:
        logger.error("%s: Failed execute command \"%s\", error:\n%s" % (check, check_command, str(e)))
        return False

    match = mon_re.search(monrun_stdout)
    if match.group('errcode') in ["1", "2"]:
        logger.info('%s: Found critical status, description:\n%s' % (check, match.group('description')))
        if match.group('description') == 'Can\'t parse output from script' or match.group('description') == 'NO DATA (cache expired)':
            logger.warning('%s: This description is found within the list of exclusions, skip restart' % (check))
            return False
        elif match.group('description').count(description_pattern) or description_pattern == "":
            logger.info('%s: Will be restarted' % (check))
            return True
    return False


def attempts_status(section, restart_attempt, restart_window, restart_min_interval, execution_interval):
    if os.path.isfile("%s/%s.%s" % (TEMP_DIR, section, EXT)):
        lines = 0
        for line in open("%s/%s.%s" % (TEMP_DIR, section, EXT)):
            lines += 1

        now = int(time.time())
        fl = open("%s/%s.%s" % (TEMP_DIR, section, EXT), "r")
        first = int(fl.readline())
        last = first
        for last in fl.xreadlines():
            pass
        if lines < restart_attempt:
            now = int(time.time())
            fl = open("%s/%s.%s" % (TEMP_DIR, section, EXT), "r")
            first = int(fl.readline())
            last = first
            for last in fl.xreadlines():
                pass

            if now-int(last) >= max(restart_min_interval, execution_interval):
                if now - int(first) > restart_window:
                    os.remove("%s/%s.%s" % (TEMP_DIR, section, EXT))
                    logger.warning('%s: Window is over. Clear restart_attempt files' % (section))
                    return True

                if max(execution_interval, restart_min_interval) * restart_attempt < restart_window:
                    logger.info('%s: Restart accept' % (section))
                    return True
                else:
                    logger.warning('%s: Restart window does not have enough size' % (section))
                    logger.info('%s: Restart accept' % (section))
                    return True
            else:
                wait_sec = restart_min_interval-(now-int(last))
                interal_sec = max(restart_min_interval, execution_interval)
                logger.warning('%s: Exceeded number of attempts to restart in %d sec. Waiting for %d sec' % (section, interal_sec, wait_sec))
                return False

        elif first + restart_window < now:
            logger.info('%s: Window is over. Clear restart_attempt files' % (section))
            os.remove("%s/%s.%s" % (TEMP_DIR, section, EXT))
            return True
        else:
            logger.warning('%s: Max attempts reached. Waiting windows end' % (section))
            return False
    else:
        return True


def restart_service(check, restart_command):
    """
    Perform command of service restart
    """
    fl = open("%s/%s.%s" % (TEMP_DIR, check, EXT), "a")
    fl.write(str(int(time.time()))+'\n')
    logger.info('%s: Launch restart command "%s"' % (check, restart_command))
    for cmd in restart_command.split(";"):
        try:
            command_stdout, command_stderr = execute_command(cmd)
            logger.info('%s: Command stdout:\n%s' % (check, command_stdout))
            if command_stderr:
                logger.warning('%s: Command stderr:\n%s' % (check, command_stderr))
        except OSError as e:
            logger.error("%s: Failed execute command \"%s\", error:\n%s" % (check, restart_command, str(e)))


def main():
    time.sleep(60)
    p = OptionParser()
    p.add_option('-l', action='store_true', dest='lock', help='lock execution')
    p.add_option('-u', action='store_true', dest='unlock', help='unlock execution')
    options, arguments = p.parse_args()

    if options.lock or ( os.path.exists('%s/%s' % ( TEMP_DIR, LOCK_FILE)) and options.unlock is None ):
        file = open('%s/%s' % ( TEMP_DIR, LOCK_FILE), 'w+')
        logger.info('Lock enabled')
        exit (0)
    if options.unlock:
        try:
            os.remove("%s/%s" % (TEMP_DIR, LOCK_FILE))
            logger.info('Lock disabled')
            exit (0)
        except (OSError):
            logger.info('Lock was disabled')
            exit (1)

    for f in os.listdir(CONF_DIR):
        parser = ConfigParser.ConfigParser()
        check_config = "%s/%s" % (CONF_DIR, f)

        try:
            parser.read(check_config)
            checks = parser.sections()
            # One check_config file might contain several checks
            for check in checks:
                # If any option of check doesn't exist then skip this check
                try:
                    restart_command = parser.get(check, 'restart_command')
                    restart_attempt = int(parser.get(check, 'restart_attempt'))
                    restart_window = int(parser.get(check, 'restart_window'))
                    restart_min_interval = int(parser.get(check, 'restart_min_interval'))
                    execution_interval = int(parser.get(check, 'execution_interval'))
                    if parser.has_option(check, 'restart_where_description'):
                        restart_where_description = parser.get(check, 'restart_where_description').strip()
                    else:
                        restart_where_description=""

                    if is_status_error(check, restart_where_description):
                        if attempts_status(check, restart_attempt, restart_window, restart_min_interval, execution_interval):
                            restart_service(check, restart_command)
                except (ConfigParser.NoOptionError):
                    continue
        except (ConfigParser.MissingSectionHeaderError):
            logger.error('Unable to find sections in %s' % (check_config))

if __name__ == '__main__':
    main()

