#!/skynet/python/bin/python

import datetime
import collections
import sys

import pymongo

import utils

from utils import Environment


SERVICE_NAME = "sandbox_service_threads_check"
HOST = "localhost"
PORT = 22222
DELAY_TOLERANT_PROCESSES = ("CleanResources",)
DB_NAME = "sandbox"
DB_NAME_PRE_PRODUCTION = "sandbox_restored"


Delay = collections.namedtuple("Delay", ["process", "comment", "host", "delay", "timeout"])


def calculate_delay(time_obj, timeout, utcnow):
    deadline = utcnow - datetime.timedelta(minutes=timeout)
    last_run, next_run = map(time_obj.get, ("last_run", "next_run"))
    if None in (last_run, next_run):
        return 0

    most_recent = max(last_run, next_run)
    if most_recent < deadline:
        return int((utcnow - most_recent).total_seconds() / 60)
    return 0


def format_time_object(time_obj):
    return "({})".format(", ".join("{}={}".format(*_) for _ in time_obj.items()))


def format_timedelta(minutes):
    hours, minutes = minutes / 60, minutes % 60
    return "{}h{}m".format(hours, minutes) if hours else "{}m".format(minutes)


def main():
    environment = Environment[sys.argv[1]] if len(sys.argv) > 1 else Environment.PRODUCTION
    db_name = DB_NAME if environment is Environment.PRODUCTION else DB_NAME_PRE_PRODUCTION

    client = pymongo.MongoClient(HOST, PORT)
    db = client[db_name]

    utcnow = datetime.datetime.utcnow()
    result = []

    for rec in db.service.find({}, ["_id", "time", "timeout", "host", "threads"]):
        timeout = rec.get("timeout", 30)
        main_delay = calculate_delay(rec.get("time", {}), timeout, utcnow)
        if main_delay:
            delay = Delay(
                process=rec["_id"],
                comment="",
                host=rec["host"],
                delay=main_delay,
                timeout=timeout,
            )
            result.append(delay)

        else:
            names, delays = [], []
            debug_info = ["{}, now = {}, time = {}".format(rec["_id"], utcnow, format_time_object(rec.get("time", {})))]
            for name, record in rec.get("threads", {}).items():
                thread_delay = calculate_delay(record, timeout, utcnow)
                debug_info.append("- {} time = {}".format(name, format_time_object(record)))
                if thread_delay:
                    names.append(name)
                    delays.append(thread_delay)
            if names:
                result.append(Delay(
                    process=rec["_id"],
                    comment="hung threads: {}".format("/".join(names)),
                    host=rec["host"],
                    delay=max(delays),
                    timeout=timeout,
                ))

    if result:
        threads = ", ".join((
            "{}{} @ {} (delay: {}, threshold: {})".format(
                d.process,
                " ({})".format(d.comment) if d.comment else "",
                d.host,
                format_timedelta(d.delay),
                format_timedelta(d.timeout),
            )
            for d in result
        ))

        urgent_event = any(_.process not in DELAY_TOLERANT_PROCESSES for _ in result)
        if urgent_event or utils.DailyAlertSpan.contains(datetime.datetime.now()):
            utils.say(SERVICE_NAME, utils.STATUS_CRIT, "Frozen processes: {}".format(threads), die=True)

    utils.say(SERVICE_NAME, utils.STATUS_OK, "OK")


if __name__ == "__main__":
    main()
