# -*- coding: utf-8 -*-
import collections
import re
import socket
import subprocess
import yt.wrapper as yt


def execute_shell(cmd):
    proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, shell=True)
    result, _ = proc.communicate()
    return result.strip()


def get_active_pids():
    # собираем пиды всех процессов пользователя nginx, с именем queue2, дочерних:
    active_pids = execute_shell("ps auxf | grep '^nginx' | grep queue2 | grep '\\_ ' | awk '{print $2}'")
    return filter(lambda p: p, active_pids.split('\n'))


if __name__ == '__main__':
    with open('/etc/yandex/disk-stat-token', 'r') as f:
        yttoken = f.read().strip()

    yt.config['token'] = yttoken
    yt.config['proxy']['url'] = settings.mrstat['yt_proxy']

    hang_tasks_from_yt = [line for line in yt.read_table('//tmp/mpfs_hang_tasks', format=yt.JsonFormat())]

    current_hostname = socket.gethostname()
    current_host_tasks = [task for task in hang_tasks_from_yt if task['host'] == current_hostname]

    active_pids = get_active_pids()
    request_ids = set()
    hang_tasks = []

    for task in current_host_tasks:
        if task['pid'] in active_pids and task['request_id'] not in request_ids:
            hang_tasks.append(task)
            request_ids.add(task['request_id'])

    hang_tasks_qty_by_type = collections.defaultdict(int)
    task_name_re = re.compile('([0-9a-f]+-){5}(?P<name>.*)')
    for task in hang_tasks:
        name = task_name_re.match(task['request_id']).group('name')
        hang_tasks_qty_by_type[name] += 1

    total = 0
    for type_, qty in hang_tasks_qty_by_type.iteritems():
        print 'hang_task_count_%s %d' % (type_, qty)
        total += qty
    print 'hang_task_count_Total %d' % total
