#!/usr/bin/env python3
# -*- coding: utf-8 -*-
#
# Provides: cpu_throt_check
# Provides: cpu_throt_capping_check
# Provides: cpu_throt_frequency_check
# Provides: cpu_throt_thermal_check
# Provides: cpu_throt_turboboost_check
# Provides: cpu_throt_perf_check
#
# Simple check for cpu throttling

from os.path import exists, join

services = {
    'cpu_throt_check': {'status':0, "msg": ""},
    'cpu_throt_capping_check': {'status':0, "msg": ""},
    'cpu_throt_frequency_check': {'status': 0, "msg": ""},
    'cpu_throt_thermal_check': {'status':0, "msg": ""},
    'cpu_throt_turboboost_check': {'status':0, "msg": ""},
    'cpu_throt_perf_check': {'status':0, "msg": ""}}

collector = {'thermal': {}, 'capping': {}, 'all': {}, 'perf': {}, 'turboboost': {}, 'frequency': {}}

def report(service, status, msg):
    print(("PASSIVE-CHECK:{};{};{}".format(service, status, str(msg)).rstrip()))


def status_collector(signal, message, status):
    if not collector[signal].get(message):
        collector[signal][message] = status
    if not collector['all'].get(message):
        collector['all'][message] = status


def read_content(file):
    with open(file, "rt") as src:
        return src.read()


def get_cpu_info(fields=['model name', 'cpu MHz']):
    cpu_info = []
    with open('/proc/cpuinfo') as f:
        for line in f:
            try:
                key, val = line.strip().split(':')
            except:
                continue

            key = key.strip()
            if key == 'processor':
                cpu_info.append({})
            elif key in fields:
                cpu_info[-1][key] = val.strip()

    return cpu_info


def offline_cores_detect(cpu_table, cpu_inf, cpu_sys_path):
    for key in cpu_table:
        if key == cpu_inf:
            # cores = cpu_table[key]["CORES"]
            cores = read_content(cpu_sys_path + '/offline')
            cores = cores.strip()
            if cores != "":
                # status_collector("offline_cores", "offline cores: " + cores, 2)
                status_collector("all", "offline cores: " + cores, 2)


def capping_detect(cpu_table, cpu_inf, cpu_sys_path):
    for key in cpu_table:
        if key == cpu_inf:
            bios_limit = cpu_table[key]["BIOS"]
            min_capped_cpus_freq = bios_limit
            capped_cpus = int()
            for cpuid in range(cpu_table[key]["CORES"]):
                bios_path = join(cpu_sys_path, ("cpu" + str(cpuid)), "cpufreq", "bios_limit")
                if exists(bios_path):
                    cur_bios_limit = int(read_content(bios_path))
                    if cur_bios_limit < bios_limit:
                        min_capped_cpus_freq = min(cur_bios_limit, min_capped_cpus_freq)
                        capped_cpus += 1
                else:
                    status_collector("capping", "bios_limit section not detected", 1)
            if capped_cpus:
                status_collector("capping", ("Capping detected on " + str(capped_cpus) + " CPUs, min_freq: " + str(
                    min_capped_cpus_freq / 1000000) + " GHz").upper(), 2)


def frequency_check(cpu_info):
    try:
        min_freq = int(read_content('/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_min_freq'))
    except:
        return  # some platforms have no such file; no file - no check =)

    min_freq += 3000  # core sensors may show a tiny bit higher freq - neutralize such noise
    min_freq /= 1000  # convert to MHz
    min_freq_cores = 0

    for core in cpu_info:
        if float(core['cpu MHz']) <= min_freq:
            min_freq_cores += 1

    if len(cpu_info) == min_freq_cores:
        status_collector("frequency", "All CPU cores at their minimal frequency", 2)


def turboboost_check(cpu_table, cpu_inf, cpu_sys_path):
    for key in cpu_table:
        if key == cpu_inf:
            boost_path = join(cpu_sys_path, "intel_pstate", "no_turbo")
            if exists(boost_path):
                boost_val = int(read_content(boost_path))
                if boost_val != 0:
                    status_collector("turboboost", "TurboBoost disabled".upper(), 2)
            max_perf_pct_path = join(cpu_sys_path, "intel_pstate", "max_perf_pct")
            if exists(max_perf_pct_path):
                max_perf_pct_val = int(read_content(max_perf_pct_path))
                if max_perf_pct_val != 100:
                    status_collector("perf", "max_perf_pct less than 100".upper(), 2)
            else:
                status_collector("turboboost", "TurboBoost not supported", 1)


def temp_check(cpu_table, cpu_inf, virtual_sys_path):
    for key in cpu_table:
        if key == cpu_inf:
            affected_packs = int()
            temp = cpu_table[key]["TEMP"]
            max_temp = temp
            for pack in range(cpu_table[key]["SOCKET"]):
                temp_path = join(virtual_sys_path, "thermal", "thermal_zone" + str(pack), "temp")
                if exists(temp_path):
                    cur_temp = int(read_content(temp_path))
                    if cur_temp >= temp:
                        affected_packs += 1
                        max_temp = max(cur_temp, max_temp)
                else:
                    status_collector("thermal", "Thermal section not detected", 1)
            if affected_packs:
                status_collector("thermal",
                    ("Critical thermal status on " + str(affected_packs) + " sockets, max_temp: " + str(
                        max_temp / 1000).upper() + '; good_temp: less than' + str(temp / 1000)).upper(), 2)


if not exists("/proc/cpuinfo"):
    for service in services.keys():
        report(service, 1, 'Unsupported platform')
    raise SystemExit(0)

supported_processors = {
                        # this processors haven't key /sys/devices/system/cpu/cpu*/cpufreq/bios_limit
                        # and /sys/devices/virtual/thermal/thermap_zone*/temp
                        # "E5620": {"BIOS": 2201000, "TEMP": 85000, "CORES": 16, "SOCKET": 2},
                        # "E5645": {"BIOS": 2201000, "TEMP": 85000, "CORES": 24, "SOCKET": 2},

                        # this processors haven't key /sys/devices/virtual/thermal/thermap_zone*/temp
                        # "X5670": {"BIOS": 2934000, "TEMP": 85000, "CORES": 24, "SOCKET": 2},
                        # "X5675": {"BIOS": 2201000, "TEMP": 85000, "CORES": 24, "SOCKET": 2},

                        "Intel(R) Xeon(R) CPU E5-2650 v2 @ 2.60GHz": {"BIOS": 2600000, "TEMP": 84000, "CORES": 32, "SOCKET": 2},
                        "Intel(R) Xeon(R) CPU E5-2660 0 @ 2.20GHz": {"BIOS": 2201000, "TEMP": 85000, "CORES": 32, "SOCKET": 2},
                        "Intel(R) Xeon(R) CPU E5-2667 0 @ 2.90GHz": {"BIOS": 2901000, "TEMP": 85000, "CORES": 24, "SOCKET": 2},
                        "Intel(R) Xeon(R) CPU E5-2667 v2 @ 3.30GHz": {"BIOS": 3301000, "TEMP": 85000, "CORES": 32, "SOCKET": 2},
                        "Intel(R) Xeon(R) CPU E5-2660 v4 @ 2.00GHz": {"BIOS": 2001000, "TEMP": 85000, "CORES": 28, "SOCKET": 2},
                        }

cpu_info = get_cpu_info()
processor_name = cpu_info[0]['model name']
root_sys_cpu = "/sys/devices/system/cpu"
root_sys_virtual = "/sys/devices/virtual"

"""Running detectors"""
offline_cores_detect(supported_processors, processor_name, root_sys_cpu)
capping_detect(supported_processors, processor_name, root_sys_cpu)
turboboost_check(supported_processors, processor_name, root_sys_cpu)
temp_check(supported_processors, processor_name, root_sys_virtual)
frequency_check(cpu_info)

for (signal, status) in collector.items():
    if status == {}:
        fin_status = 0
        fin_message = "OK"
    else:
        fin_status = max(status.values())
        fin_message = "; ".join(status.keys())

    if signal == 'all':
        name = 'cpu_throt_check'
    else:
        name = 'cpu_throt_{}_check'.format(signal)

    services[name]['status'] = fin_status
    services[name]['msg'] = fin_message

for (service, item) in services.items():
    report(service, item['status'], item['msg'])
