#!/usr/bin/env python2
# -*- encoding: utf-8 -*-

import json
import os
import time
import urllib
import urllib2

SETTINGS = { 'host': 'ppclogpusher-storage.direct.yandex.net',
            'port': 8123,
            'user': 'readonly',
            'password': '',
            'timeout': 5,
            'delay': 120 #закладываемся на то, что в CH данные приходят не сразу.
          }

def debug(msg):
    if os.environ.get('DEBUG'):
        print 'DEGUG: {0}'.format(msg)
    return

def graphiteFormat(metric_name, value, delay=0):
    ts = int(time.time()) - delay
    print "%s %s %s" % (metric_name, value, ts)

def requestClickhouse(query, **kwargs):
    response = {}
    debug(query)
    query = urllib.quote(query)
    host = kwargs.get('host', 'localhost')
    port = kwargs.get('port', '8123')
    user = kwargs.get('user', None)
    password = kwargs.get('password', None)
    timeout = kwargs.get('timeout', 60)
    address = 'http://{0}:{1}/?query={2}&user={3}'.format(host, port, query, user)
    address = '{0}&password={1}'.format(address, password) if password else address
    request = urllib2.Request(address)
    debug(address)
    for _ in range(3): #retry request
        try:
            response = urllib2.urlopen(request, timeout=timeout).read().rstrip()
            break
        except Exception as err:
            time.sleep(1)
            debug(err)
    debug(response)
    return json.loads(response)


def getPtkillCount(**kwargs):
    '''Сначала выбираем список всех инстансов из БД, т.к. при отсутствии значений в query2 будет пустота.
       Полученный результат query1 мержим со значениями из query2. Отсутствующим метрикам проставляется ноль.
    '''
    delay = kwargs.get('delay', 0)
    query1 = """SELECT instance FROM ppclogpusher.mysql_ptkill GROUP BY instance FORMAT JSON"""
    query2 = """SELECT instance, count(*) AS cnt FROM ppclogpusher.mysql_ptkill
                    WHERE (day = today() or day = yesterday()) AND 
                           date >= toStartOfMinute(toDateTime(now() - 60 - {0})) AND 
                           date < toStartOfMinute(toDateTime(now() - {0})) 
                    GROUP BY instance FORMAT JSON""".format(delay)
    raw_instances = [(d['instance'], 0) for d  in requestClickhouse(query1, **kwargs)['data'] if d]
    raw_counts = [(d['instance'], d['cnt']) for d in requestClickhouse(query2, **kwargs)['data'] if d]
    result = dict(raw_instances)
    result.update(dict(raw_counts))
    return result

def printGraphite(data, **kwargs):
    delay = kwargs.get('delay', 0)
    for instance in data:
        metric_name = 'one_min.mysql_{0}.ptkill.count'.format(instance)
        graphiteFormat(metric_name, data[instance], delay)

if __name__ == '__main__':
    jdata = getPtkillCount(**SETTINGS)
    printGraphite(jdata, **SETTINGS)
