#!/usr/bin/python
# -*- coding: utf8 -*-
#
# скрипт рисует графики таймингов сервисов по данным из кликхауса (туда они попадают с серверов через mtrs-logpusher)
# при добавлении нового балансера нужно дописать сответствующий вызов showTimings в __main__ по аналогии

import urllib
import urllib2
import json
import time
import os

chUrl='http://localhost:8123/?query=%s'

delay=120

commonLimits='''day = today() and
                date >= toStartOfMinute(toDateTime(now() - 60 - %(delay)d )) and
                date < toStartOfMinute(toDateTime(now() - %(delay)d )) and
                static = 0
''' % {"delay": delay}

def log_debug(msg):
    if not os.environ.get('DEBUG'):
        return
    print 'DEBUG: ' + msg.replace('\n', ' ').replace('\r', '')

def getJsonFromCh(query):
    q_enc=urllib.quote(query)
    request = urllib2.Request(chUrl % q_enc)
    # FIXME 'ретрай'
    try:
        response = urllib2.urlopen(request).read().rstrip()
    except:
        response = urllib2.urlopen(request).read().rstrip()

    return json.loads(response)

def graphiteFormat(metric_name, value):
    ts = int(time.time()) - delay
    print "%s %s %s" % (metric_name, value, ts)

def showTimings(vhost_mask, vhost_name, fqdn_suffix='yandex_ru', table='ppclogpusher.logs', group_by='dc'):

    query   = """select lower(%s) as dc1,count()/60 as rps from %s where
                %s and
                vhost like '%s'
                group by dc1 format JSON""" % (group_by, table, commonLimits, vhost_mask)
    log_debug('rps by ds: ' + query)

    for data in getJsonFromCh(query)['data']:
        if group_by == 'dc':
            graphiteFormat("one_min.%s-%s_%s.access_log.rps" % (vhost_name, data['dc1'].lower(), fqdn_suffix), data['rps'])
        elif group_by == 'hostname':
            graphiteFormat("one_min.%s.access_log.rps" % (data['dc1'].replace('.', '_'), ), data['rps'])


    query = """select lower(%s) as dc1,code,count() as rps from %s where
               %s and
               vhost like '%s'
               group by dc1,code format JSON""" % (group_by, table, commonLimits, vhost_mask)
    log_debug('http codes by ds: ' + query)

    for data in getJsonFromCh(query)['data']:
        if group_by == 'dc':
            graphiteFormat("one_min.%s-%s_%s.access_log.%s" % (vhost_name, data['dc1'].lower(), fqdn_suffix, data['code']), data['rps'])
        elif group_by == 'hostname':
            graphiteFormat("one_min.%s.access_log.%s" % (data['dc1'].replace('.', '_'), data['code']), data['rps'])

    prcs=[0.5,0.6,0.9,0.95,0.98,0.99,1]
    query   = """select lower(%s) as dc1, quantiles(%s)(time) as timings from %s where
                %s and
                vhost like '%s'
                group by dc1 with totals format JSON""" % (group_by, ",".join([str(x) for x in prcs]), table, commonLimits, vhost_mask)
    log_debug('timings by ds: ' + query)

    data = getJsonFromCh(query)
    for js in data['data']:
        for (prc, prc_timing) in zip(prcs,js['timings']):
            if group_by == 'dc':
                graphiteFormat("one_min.%s-%s_%s.access_log.timings.%s" % (vhost_name, js['dc1'].lower(), fqdn_suffix, int(prc*100)), prc_timing)
            elif group_by == 'hostname':
                graphiteFormat("one_min.%s.access_log.timings.%s" % (js['dc1'].replace('.', '_'), int(prc*100)), prc_timing)

    if group_by != 'dc':
        return
    # Данных может не быть, если не было запросов
    try:
        for (prc, prc_timing) in zip(prcs,data['totals']['timings']):
            graphiteFormat("one_min.%s_%s.access_log.timings.%s" % (vhost_name, fqdn_suffix, int(prc*100)), prc_timing)
    except:
        pass


if __name__ == '__main__':
    for field in ['dc', 'hostname']:
        showTimings('direct-mod.yandex-team.ru', 'direct-mod', group_by=field)
        showTimings('intapi.direct-mod.yandex.ru', 'direct-mod-intapi', group_by=field)
        showTimings('partner2.yandex.ru', 'partner2', group_by=field)
        showTimings('distribution.yandex.ru', 'distribution', group_by=field)
        showTimings('back.advq.yandex.ru', 'back_advq', group_by=field)
        showTimings('back-test.advq.yandex.ru', 'back_advq_test', group_by=field)
        showTimings('forecast.advq.yandex.ru', 'forecast_advq', group_by=field)
        showTimings('whale-http.yandex.net', 'whale-http', fqdn_suffix='yandex_net', group_by=field)
        showTimings('advquick.yandex.ru', 'advquick', group_by=field)
        showTimings('wordstat.advq.yandex.ru', 'wordstat_advq', group_by=field)
        showTimings('catmedia.yandex.ru', 'catmedia', group_by=field)
        showTimings('bmprod-http.yandex.ru', 'bmprod-http', group_by=field)
        showTimings('catalogia-mod.yandex.ru', 'catalogia-mod', group_by=field)
        showTimings('catalogia-mod-rt.yandex.net', 'catalogia-mod-rt', fqdn_suffix='yandex_net', group_by=field)
        showTimings('bscatalogia.yandex.ru', 'bscatalogia', group_by=field)
        showTimings('catalogia-phrases-rt.yandex.net', 'catalogia-phrases-rt', group_by=field)
        showTimings('bmapi.yandex.ru', 'bmapi', group_by=field)
        showTimings('bmwordstat.yandex.ru', 'bmwordstat', group_by=field)
        showTimings('catalogia-context.yandex.ru', 'catalogia-context', group_by=field)
        showTimings('catalogia-context-rt.yandex.net', 'catalogia-context-rt', group_by=field)
