#!/usr/bin/env python
# -*- coding: utf-8 -*-
import sys
import glob
import time

def die(code=0, msg='ok'):
    if not msg:
        msg = 'ok'
    print '{c};{m}'.format(c=code, m=msg)
    sys.exit(0)

def check_reporter(state):
    code = 0
    msg = ''
    # Check if it is running. If it is raise code to warning (later)
    is_running = state.islocked()
    s = state.read()
    # 3. If state = error, raise code to critical, print error description.
    if s.get('status', 'error') == 'error':
        # Sometimes error is too descriptive.
        if s.get('error') is not None:
            s['error'] = s['error'].splitlines()[0].replace(';','')
        code = 2
        msg = '{name} failed at {stage}: {desc}'.format(
                name = s.get('name', 'unknown'),
                stage = s.get('stage', 'unknown stage'),
                desc = s.get('error','unknown error')
                )
        # state = error and still running? WTF?
        if is_running:
            msg += ' proc is still alive, last upd at {time}'.format(
                    time=time.strftime('%Y-%02m-%02d.%02H:%02M:%02S', time.localtime(float(s.get('launched_ts', 0.0))))
                    )

    elif s.get('status') == 'finished':
        code = 0

    # 2. If it does, see what it is doing (stage, launched_ts, state)
    elif s.get('status') == 'running':
        if is_running:
            code = 1
            msg = '{name} is in {stage} since {time}'.format(
                    name=s.get('name','unnamed'),
                    stage=s.get('stage','unknown'),
                    state=s.get('status'),
                    time=time.strftime('%Y-%02m-%02d.%02H:%02M:%02S', time.localtime(float(s.get('launched_ts', 0.0)))),
                    )
        else:
            # The lock is stale!
            code = 2
            msg = '{name} is dead, stage {stage}, last upd at {time}'.format(
                    name=s.get('name','unnamed'),
                    stage=s.get('stage','unknown'),
                    time=time.strftime('%Y-%02m-%02d.%02H:%02M:%02S', time.localtime(float(s.get('launched_ts', 0.0)))),
                    )
    return (code, msg)



    # 4. If state = running, just return the name and launched time.
    

try:
    from yt_reporter.lock.file import StateFile,StateException,StateLockedException
except ImportError:
    die(0, 'reporter not installed')

report = [(0,'')]
for statefile in glob.glob('/tmp/__reporter_state_*'):
    try:
        state = StateFile(statefile, lock=False)
        # check_report() returns tuple(code, description)
        report.append(check_reporter(state))
    except Exception as e:
        report.append((2, unicode(e)))

# Return only problematic statuses (code > 0). 
# If all ok, we get an empty string (basically equal to ' '.strip())
message = ' '.join([msg + ',' for code, msg in sorted(report) if code > 0]).strip()
code = max(dict(report).keys())

die(code, message)
