#!/usr/bin/env python2.7
import time
import os
import sys

dir_path = '/'.join(os.path.abspath(__file__).split('/')[:-2])
sys.path = [dir_path] + sys.path

import yt.wrapper as yt
from nile.api.v1 import clusters, aggregators as na, extractors as ne
from nile.api.v1.datetime import Datetime, next_datetime
from qb2.api.v1 import extractors as se, filters as sf
from common.juggler import send_juggler, ok, crit


def find_stop_hook_status(yql_token, interval, log_path, status_out, full_out):
    stop_message_failed = 'stop_hook: push-client lag check FAILED, restarting push-client'
    stop_message_success = 'stop_hook: push-client lag is OK, shutting down supervisor'
    timestamp_now = time.time()
    rounded_now = Datetime.fromtimestamp(timestamp_now - timestamp_now % 300)
    time_start = next_datetime(rounded_now, offset=interval, scale='hourly')
    time_range = '{{{}..{}..5:minutely}}'.format(time_start.isoformat(), rounded_now.isoformat())

    cluster = clusters.Hahn(yql_token=yql_token)
    job = cluster.job().env(templates=dict(
        yt_range=time_range,
        title='DiskQloudStopHookMonitor for %s' % time_start.isoformat(),
    ))
    # append range from template to base path
    log_path = os.path.join(log_path, '@yt_range')
    src_table = job.table(log_path, ignore_missing=True)
    status_full = src_table.qb2(
        log='generic-yson-log',
        fields=[
            se.log_fields(
                'iso_eventtime',
                'qloud_project',
                'qloud_application',
                'qloud_component',
                'qloud_environment',
                'qloud_instance',
                'host',
                'message'
            )
        ],
        filters=[
            sf.and_(
                sf.and_(
                    sf.equals('qloud_project', 'disk'),
                ),
                sf.or_(
                    sf.equals('qloud_environment', 'production'),
                    sf.equals('qloud_environment', 'corp-stable'),
                    sf.startswith('qloud_environment', 'stable'),
                ),
                sf.or_(
                    sf.equals('message', stop_message_failed),
                    sf.equals('message', stop_message_success)
                ),
            ),
        ],
    )
    status_count = status_full.groupby(
        'message',
        'qloud_application',
        'qloud_project',
        'qloud_component',
        'qloud_environment',
    ).aggregate(n=na.count()).project(
        'qloud_application',
        'qloud_project',
        'qloud_component',
        'qloud_environment',
        'n',
        status=ne.custom(lambda m: 'OK' if 'lag is OK' in m else 'FAIL', 'message').add_hints(type=str),
    )
    status_full.put(full_out)
    status_count.put(status_out)
    job.run()


if __name__ == '__main__':
    yql_token = os.getenv('DISK_STAT_YQLTOKEN')
    yt.config['token'] = os.getenv('DISK_STAT_TOKEN')
    timerange = -24  # range in hours
    job_root = os.getenv('JOB_ROOT') + '/monitors/stop_hook/'
    qloud_logs_path = '//home/logfeller/logs/qloud-runtime-log/stream/5min/'
    metahost = 'disk_qloud'
    juggler_event = 'stop_hook_status'
    status_table = job_root + 'stop_hook_status_count'
    full_table = job_root + 'stop_hook_status_full'

    # generate stop hook status report and put it into status_table + additional info in full_table
    find_stop_hook_status(yql_token, timerange, qloud_logs_path, status_table, full_table)

    yt.config.set_proxy('hahn.yt.yandex.net')

    for r in yt.read_table(status_table, format=yt.YsonFormat()):
        if 'OK' in r['status']:
            continue

        metahost = 'qloud-ext.{}.{}.{}.{}'.format(
            r['qloud_project'],
            r['qloud_application'],
            r['qloud_environment'],
            r['qloud_component'],
        )
        status, text = crit('Some stop hook runs failed. Details in {}.'.format(full_table))
        send_juggler(metahost, juggler_event, status, text)
