#!/usr/bin/python
# -*- coding: utf-8 -*-

"""
подсчёт перцентилей шаблонизации по trace-логам в YT
"""

""" METADATA

<crontab>
    env: YT_DIRECT_CLUSTER=hahn
    time: 4 4 * * *
    params: calc_in_yt
    <switchman>
        lockname: templating_trace_procentiles.py.calc_in_yt.hahn
        group: scripts-other
    </switchman>
    package: scripts-switchman
</crontab>
<crontab>
    env: YT_DIRECT_CLUSTER=arnold
    time: 0 4 * * *
    params: calc_in_yt
    <switchman>
        lockname: templating_trace_procentiles.py.calc_in_yt.arnold
        group: scripts-other
    </switchman>
    package: scripts-switchman
</crontab>
<crontab>
    env: YT_DIRECT_CLUSTER=prod
    time: 30 5 * * *
    params: write_to_graphite
    <switchman>
        lockname: templating_trace_procentiles.py.write_to_graphite.prod
        group: scripts-other
    </switchman>
    package: scripts-switchman
</crontab>
<juggler>
    host:   checks_auto.direct.yandex.ru
    name:       scripts.templating_trace_procentiles.working
    raw_events: scripts.templating_trace_procentiles.working.$yt_cluster.calc_in_yt
    raw_events: scripts.templating_trace_procentiles.working.prod.write_to_graphite
    vars:       yt_cluster=hahn,arnold
    ttl: 2d2h
    tag:        direct_yt
    tag:        direct_group_internal_systems
</juggler>

"""

import argparse, logging
import sys, os
from datetime import datetime, date, timedelta
import time
import graphitesend

sys.path[0:0] = [os.path.realpath(os.path.join(os.path.dirname(__file__), ".."),)]

import settings
from yandex.juggler import juggler_queue_event
from direct.ytutils import run_simple_operation
import direct.tools

import yt.wrapper as yt
yt.config["pickling"]["enable_tmpfs_archive"] = False
yt.config["write_progress_bar"]["enable"] = False


DEFAULT_LOG_LEVEL = 'warn'
HOST = '127.0.0.1'
PORT = 42000
YT_RES_FILE = 'tmp/templating_trace_procentiles'
DAYS_AGO = 7
PREFIX = 'direct_one_min.db_configurations.production.web.templating'

@yt.aggregator
def mapper(rows):
    from collections import defaultdict
    from datetime import datetime
    FUNCS = [
        'template:process_bem',
    ]

    cache = defaultdict(int)
    for row in rows:
        if not row.has_key('log_time') or (row.get('service', None) != 'direct.web' and row.get('service', None) != 'direct.perl.web'):
            continue
        hour_dt = datetime.strptime(row['log_time'][0:13], "%Y-%m-%d %H")
        for fp in row['profile']:
            if fp['func'] in FUNCS:
                ela = int(fp['all_ela'] * 1000 + 0.5)
                if ela <= 100:
                    ela = 100
                elif ela < 1000:
                    ela = 10 * int(ela/10)
                elif ela < 30000:
                    ela = 100 * int(ela/100)
                elif ela < 600000:
                    ela = 1000 * int(ela/1000)
                else:
                    ela = 600000
                cache[(row['method'], hour_dt, ela)] += 1
                cache[('_all', hour_dt, ela)] += 1
    for (method, dt, ela), cnt in cache.iteritems():
        yield {'method': method, 'dt': str(dt), 'ela': ela, 'cnt': cnt}


def reducer(grp, rows):
    import operator
    from collections import deque
    PROCS = [50, 80, 90, 95, 98, 99]

    rows = sorted(rows, key=operator.itemgetter('ela'))
    sum_cnt = sum([r['cnt'] for r in rows])
    sum_elas = sum([r['cnt'] * r['ela'] for r in rows])

    if sum_cnt < 5:
        return

    def metric(name, value):
        full_name = grp['method'] + '.' + name
        return dict(dt=grp['dt'], name=full_name, value=value)

    yield metric('cnt', sum_cnt)
    yield metric('avg', sum_elas / sum_cnt)

    rcnt = 0
    proc_cnt_deque = deque((p * sum_cnt // 100, p) for p in PROCS)
    for row in rows:
        rcnt += row['cnt']
        while proc_cnt_deque and proc_cnt_deque[0][0] <= rcnt:
            yield metric('proc_%d' % proc_cnt_deque[0][1], row['ela'])
            proc_cnt_deque.popleft()


def juggler_ok(action):
    prefix = 'scripts.templating_trace_procentiles.working.' + os.environ.get('YT_DIRECT_CLUSTER', 'prod')
    juggler_queue_event(prefix + '.' + action, 'OK', 'successfuly performed action: '+action)


def calc_in_yt():
    dates = [(date.today() - timedelta(days=days_ago)).strftime('%Y-%m-%d') for days_ago in range(DAYS_AGO)]
    run_simple_operation(yt.run_map_reduce,
        mapper, reducer,
        ['//home/direct/logs/trace_log/%s{log_time,service,method,profile}' % d for d in dates],
        YT_RES_FILE,
        reduce_by=['method', 'dt']
        )
    yt.run_merge(YT_RES_FILE, YT_RES_FILE, spec={'combine_chunks': 'true'})
    juggler_ok('calc_in_yt')


def write_to_graphite():
    gsender = graphitesend.init(graphite_server=HOST, graphite_port=PORT, system_name='', prefix=PREFIX)
    data = []
    for row in yt.read_table(YT_RES_FILE, yt.YsonFormat('binary'), raw=False):
        ts = datetime.strptime(row['dt'], "%Y-%m-%d %H:%M:%S").strftime("%s")
        data.append([row['name'], int(row['value']), int(ts)])

    chunk_size = 100
    for chunk in [data[i:i+chunk_size] for i in xrange(0, len(data), chunk_size)]:
        #print chunk
        gsender.send_list(chunk)
        time.sleep(0.1)
    juggler_ok('write_to_graphite')

def argparser():
    parser = argparse.ArgumentParser(description='Calc template timing percentiles in YT and send them to graphite')

    parser.add_argument('--log', type=str, default=DEFAULT_LOG_LEVEL, help='log level, default: '+DEFAULT_LOG_LEVEL)
    parser.add_argument('actions', type=str, nargs=argparse.REMAINDER, choices=['calc_in_yt', 'write_to_graphite'], help='actions')

    return parser

if __name__ == '__main__':
    args = argparser().parse_args()

    direct.tools.set_logging(loglevel=args.log.upper(), add_info=os.environ.get('YT_PROXY', '-'))
    logging.warn("start")

    if not args.actions:
        logging.error("No actions defined")

    for action in args.actions:
        if action == 'calc_in_yt':
            calc_in_yt()
        elif action == 'write_to_graphite':
            write_to_graphite()
        else:
            logging.error("incorrect action: %s" % action)
            raise Exception("incorrect action: %s" % action)

    logging.warn("finish")
