#!/usr/bin/env python2.7
import time
import os
import sys

dir_path = '/'.join(os.path.abspath(__file__).split('/')[:-2])
sys.path = [dir_path] + sys.path

import yt.wrapper as yt
from nile.api.v1 import clusters, aggregators as na
from nile.api.v1.datetime import Datetime, next_datetime
from qb2.api.v1 import extractors as se, filters as sf
from common.juggler import send_juggler, ok, crit


def read_yt_table(token, yt_path):
    table = []
    yt.config['token'] = token
    yt.config.set_proxy('hahn.yt.yandex.net')
    for each in yt.read_table(yt_path, format=yt.YsonFormat()):
        table.append(each)
    return table


def prepare_oom_count(token, interval, log_path, count_out):
    oom_message = '(signal was \'killed\'), it was killed by OOM killer, will be respawned'
    timestamp_now = time.time()
    rounded_now = Datetime.fromtimestamp(timestamp_now - timestamp_now % 300)
    time_start = next_datetime(rounded_now, offset=interval, scale='hourly')
    time_range = '{{{}..{}..5:minutely}}'.format(time_start.isoformat(), rounded_now.isoformat())

    cluster = clusters.Hahn(token=token)
    job = cluster.job().env(
        templates=dict(yt_range=time_range)
    )
    # append range from template to base path
    log_path = os.path.join(log_path, '@yt_range')
    src_table = job.table(log_path, ignore_missing=True)
    oom_full = src_table.qb2(
        log='generic-yson-log',
        fields=[
            se.log_fields(
                'qloud_project',
                'qloud_application',
                'qloud_component',
                'qloud_environment',
                'qloud_instance',
                'host',
                'message',
                'iso_eventtime'
            )
        ],
        filters=[
            sf.and_(
                sf.and_(
                    sf.equals('qloud_project', 'disk'),
                ),
                sf.or_(
                    sf.equals('qloud_environment', 'production'),
                    sf.equals('qloud_environment', 'corp-stable'),
                    sf.startswith('qloud_environment', 'stable'),
                ),
                sf.contains('message', oom_message),
            ),
        ],
    )

    oom_count = oom_full.groupby('qloud_application', 'qloud_project', 'qloud_component', 'qloud_environment') \
        .aggregate(n=na.count()) \
        .project('qloud_application', 'qloud_project', 'qloud_component', 'qloud_environment', 'n')
    oom_count.put(count_out)
    job.run()


if __name__ == '__main__':

    yt_token = os.getenv('DISK_STAT_TOKEN')
    timerange = int(os.getenv('TIMERANGE'))  # range in hours
    crit_level = int(os.getenv('CRIT'))
    job_root = os.getenv('JOB_ROOT') + '/monitors/qloud_oom_count/'
    qloud_logs_path = '//home/logfeller/logs/qloud-runtime-log/stream/5min/'
    juggler_event = 'qloud_oom_count'
    qloud_oom_count_table = job_root + 'qloud_oom_count'

    prepare_oom_count(yt_token, timerange, qloud_logs_path, qloud_oom_count_table)
    results = read_yt_table(yt_token, qloud_oom_count_table)

    for r in results:
        metahost = 'qloud-ext.{}.{}.{}.{}'.format(r['qloud_project'], r['qloud_application'], r['qloud_environment'], r['qloud_component'])
        oom_cnt = r['n']
        if oom_cnt >= crit_level:
            status, text = crit('OOM count: {} in last {} hours.'.format(oom_cnt, abs(timerange)))
        else:
            status, text = ok('OOM count: {} in last {} hours.'.format(oom_cnt, abs(timerange)))

        send_juggler(metahost, juggler_event, status, text)
