#!/usr/bin/env python
# -*- coding: utf-8 -*-

import os
import argparse
import subprocess
import re
import yaml
import json
import datetime
import direct_juggler.juggler as dj
from kazoo.client import KazooClient

DB_CONFIG = '/etc/yandex-direct/db-config.json'
with open(DB_CONFIG, 'r') as fh:
    ZK_HOSTS = ','.join(json.load(fh)['db_config']['CHILDS']['zookeeper_ppcback']['host'])

ZK_TIMEOUT = 10
ZK_RETRY = {"max_tries": 1, "delay": 1, "max_jitter": 1, "backoff": 1, "ignore_expire": False}
zkh = KazooClient(hosts=ZK_HOSTS, timeout=ZK_TIMEOUT, connection_retry=ZK_RETRY, command_retry=ZK_RETRY)

APPS_CONF_FILE = "/etc/yandex-direct/direct-apps.conf.yaml"
SERVICE_NAME = 'java_jobs_old_processes'

DESCRIPTION_PREFIX = 'from: %s' % os.path.basename(__file__)
STATS_PATH = '/var/log/yandex/jobs-time-alive-monitoring'

THRESHOLD = 12 * 60 * 60 # в секундах


def get_prod_version():
    zkh.start(timeout=ZK_TIMEOUT * ZK_HOSTS.count(',') + 1)
    with open(APPS_CONF_FILE, 'r') as fh:
        APPS_CONF = yaml.load(open(APPS_CONF_FILE))['apps']

    version_node = zkh.get(APPS_CONF['java-jobs']['zookeeper-version-node'])[0]
    result = version_node[:version_node.find('\n')]
    zkh.stop()

    return result


def parse_options():
    parser = argparse.ArgumentParser(
        formatter_class=argparse.RawDescriptionHelpFormatter,
        description='Наблюдает за запущенными процессами java-jobs и сигнализирует о старых в juggler',
        epilog="Пример использования:\n\t %(prog)s --juggler"
    )

    parser.add_argument('-j', '--juggler', action='store_true',
        dest="juggler", help="отправить рузультат в juggler и ничего не выводить")

    return parser.parse_args()


def run():
    try:
        opts = parse_options()
    
        prod_version = get_prod_version()
    
        ps_output = subprocess.check_output(['ps', '-eo', 'cmd'])
        cur_versions = re.findall(r'java.+ru\.yandex\.direct\.jobs\.JobsApp.+version.+?([0-9-.]+)', ps_output, re.M)
        is_crit = False
        times = []
        dt_now = datetime.datetime.now()

        for version in sorted(cur_versions):
            cur_is_crit = False
            path = os.path.join(STATS_PATH, version)
            start_time = ''

            if os.path.isfile(path):
                with open(path, 'r') as fh:
                    start_time = fh.read().strip()

            if version == prod_version:
                open(path, 'w').close()
                if not opts.juggler:
                    print 'version: %s - prod version\nOK' % version
            else:
                if not start_time:
                    start_time = dt_now.strftime('%Y-%m-%d %H:%M:%S')
                    with open(path, 'w') as fh:
                        fh.write(start_time)
                else:
                    start_time_dt = datetime.datetime.strptime(start_time, '%Y-%m-%d %H:%M:%S')
                    cur_is_crit = (dt_now - start_time_dt).total_seconds() > THRESHOLD

                if not opts.juggler:
                    print 'version: %s - old version\ntime when this version became old: %s\n%s' % (
                        version,
                        start_time,
                        ('CRIT: old java-jobs version works too long (> %s)' % str(datetime.timedelta(seconds=THRESHOLD))
                         if cur_is_crit
                         else 'OK'),
                    )

            if not opts.juggler:
                print "-" * 40
            is_crit |= cur_is_crit

        cur_versions = set(cur_versions)
        for stats_file_version in os.listdir(STATS_PATH):
            if stats_file_version not in cur_versions:
                os.remove(os.path.join(STATS_PATH, stats_file_version))

        if opts.juggler:
            dj.queue_events([{
                'service': SERVICE_NAME,
                'status': 'CRIT' if is_crit else 'OK',
                'description': "; ".join([DESCRIPTION_PREFIX, 'run this script for details' if is_crit else '']),
            }])

    except Exception as e:
        if opts.juggler:
            dj.queue_events([{
                'service': SERVICE_NAME,
                'status': 'CRIT',
                'description': "; ".join([DESCRIPTION_PREFIX, 'run this script for details', 'unexpected exception: %s %s' % (type(e), e)]),
            }])
        raise


if __name__ == '__main__':
    run()
