#!/usr/bin/env python
# -*- coding: utf-8 -*-

"""
Визуализация использования дискового пространства шардированными базами
Генерирует svg в текущей директории
"""

import json
import time
import datetime
import requests
import urllib
import urlparse
import numpy as np
import math
import matplotlib as mpl
mpl.use('Agg')
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import matplotlib.ticker as tkr


def url_fix(s, charset='utf-8'):
    if isinstance(s, unicode):
        s = s.encode(charset, 'ignore')
    scheme, netloc, path, qs, anchor = urlparse.urlsplit(s)
    path = urllib.quote(path, '/%')
    qs = urllib.quote_plus(qs, ':&=')
    return urlparse.urlunsplit((scheme, netloc, path, qs, anchor))

def merge_graph_data(url_template, shards):
    data_for_shard = {}
    all_timestamps = []
    data = {}
    for sh in shards:
        data_for_shard[sh] = {}
        url = url_template.format(sh)
        content = requests.get(url).content
        datapoints = json.loads(content)[0]['datapoints']
        for dp in datapoints:
            if dp[0]:
                data_for_shard[sh][dp[1]] = dp[0]
                all_timestamps.append(dp[1])
    for t in sorted(all_timestamps):
        for sh in shards:
            if t not in data_for_shard[sh]:
                # будет давать неверный результат в случае "дырок", но если их не слишком много, на экстраполяцию мало повлияет
                data_for_shard[sh][t] = 0
        data[t] = sum([data_for_shard[sh][t] for sh in shards])
    return [[data[t], t] for t in sorted(data.keys())]


def draw_graph(basename, graphite_url, shards_now, final_date_Y_m, warn_shard_size, crit_shard_size, model='linear'):
    cache_name = basename + '.cache'
    data = []
    try:
        with open(cache_name, 'r') as f:
            content = f.read()
        print 'read data from cache file ' + cache_name
    except:
        url = url_fix(graphite_url)
        print 'no cache file, read data from ' + url
        if basename == 'ppcdata-diskspace' and 'shard*' in graphite_url:
            # графит не справляется с shard*, суммируем вручную
            data = merge_graph_data(graphite_url.replace('shard*', 'shard{}'), range(1, shards_now + 1))
            content = json.dumps([{'target': '...', 'datapoints': data}])
        else:
            content = requests.get(url).content

    with open(cache_name, 'w') as f:
        f.write(content)

    if not data:
        data = json.loads(content)[0]['datapoints']
    graph_name = basename + '.%02d_shards_now.%s.size_gb_%d_warn_%d_crit' % (shards_now, final_date_Y_m, int(warn_shard_size/(1024**3)), int(crit_shard_size/(1024**3)))
    print graph_name

    warn_size = shards_now * warn_shard_size
    ##warn_size2 = (shards_now + shards_add) * warn_shard_size
    crit_size = shards_now * crit_shard_size
    ##crit_size2 = (shards_now + shards_add) * crit_shard_size

    data = filter(lambda a: all([i != None and i > 0 for i in a]), data)
    data = sorted(data, key=lambda a: a[1])
    data = np.array(data)
    x, y = data[:,1], data[:,0]

    fig = plt.figure()

    # параметры графика
    ax1 = fig.add_subplot(111)
    ax1.xaxis.set_minor_locator(mdates.MonthLocator())
    ax1.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d'))
    ax1.yaxis.set_minor_locator(tkr.AutoMinorLocator())

    ax1.set_xlabel('date')
    ax1.set_ylabel('bytes')
    ax1.ticklabel_format(style = 'sci', scilimits=(0, 3), axis='y')
    ax1.xaxis.grid(color='0.8', which='major', linestyle='dashed', linewidth=0.1, alpha=1.0)
    ax1.xaxis.grid(color='0.8', which='minor', linestyle='dashed', linewidth=0.1, alpha=1.0)
    fig.autofmt_xdate()

    ax2 = ax1.twinx()
    ax2.set_ylabel('warn_size full shards')
    ax2.yaxis.set_minor_locator(tkr.AutoMinorLocator())
    ax2.yaxis.grid(color='0.8', which='major', linestyle='dashed', linewidth=0.1, alpha=1.0)
    ax2.yaxis.grid(color='0.8', which='minor', linestyle='dashed', linewidth=0.1, alpha=1.0)
    
    # исходные точки
    ax1.plot_date(mdates.epoch2num(x), y, marker='', linestyle='-', linewidth=1.5)

    x_warn2 = time.mktime(datetime.datetime.strptime(final_date_Y_m, "%Y-%m").timetuple())
    days_interval_warn = int((x_warn2 - time.time()) / 3600 / 24)
    shards_add, warn_size2, crit_size2 = shards_now, warn_size, crit_size
    # аппроксимированная линия, до x_last
    if model == 'linear':
        # находим коэффициенты y = k*x + c
        A = np.vstack([x, np.ones(len(x))]).T
        k, c = np.linalg.lstsq(A, y)[0]
        k_shards = k / warn_shard_size
        c_shards = c / warn_shard_size
        print "y (bytes) = %g * x (seconds) + %g" % (k, c)
        print "y (shards) = %g * x (seconds) + %g" % (k_shards, c_shards)
    
        warn_size2 = k * x_warn2 + c
        print "warn_size after %d days = %g" % (days_interval_warn, warn_size2)

        shards_warn2 = int(math.floor(warn_size2 / warn_shard_size + 0.5))
        shards_add = shards_warn2 - shards_now
        print "+%d shards to get %d gb per shard in %d days from now" % (shards_add, warn_shard_size / (1024**3), days_interval_warn)

        crit_size2 = shards_warn2 * crit_shard_size;
        x_crit2 = (crit_size2  - c) / k
        days_interval_crit = int((x_crit2 - x[0]) / 3600 / 24)
        print "shards will reach a critical size in %d days from now" % (days_interval_crit,)

        ##x = np.array([x[0], np.divide(crit_size2 - c, k)])
        x = np.array([x[0], x_crit2])
        ax2.plot_date(mdates.epoch2num(x), k_shards * x + c_shards, marker='', linestyle='-', linewidth=0.5, alpha=0.0)
        ax1.plot_date(mdates.epoch2num(x), k * x + c, label=basename, marker='', linestyle='-', linewidth=2)
    elif model == 'exponential':
        # y = c * exp(k * x)
        k, clog = np.polyfit(x, np.log(y), 1, w=np.sqrt(y))
        c = np.exp(clog)
        k_shards = k
        c_shards = c / warn_shard_size
        print "y (bytes) = %g * exp(%g * x (seconds))" % (c, k)
        print "y (shards) = %g * exp(%g * x (seconds))" % (c_shards, k_shards)

        warn_size2 = c * np.exp(k * x_warn2)
        print "warn_size after %d days = %g" % (days_interval_warn, warn_size2)

        shards_warn2 = int(math.floor(warn_size2 / warn_shard_size + 0.5))
        shards_add = shards_warn2 - shards_now
        print "+%d shards to get %d gb per shard in %d days from now" % (shards_add, warn_shard_size / (1024**3), days_interval_warn)

        crit_size2 = shards_warn2 * crit_shard_size;
        x_crit2 = np.log(crit_size2 / c) / k
        days_interval_crit = int((x_crit2 - x[0]) / 3600 / 24)
        print "shards will reach a critical size in %d days from now" % (days_interval_crit,)

        x = np.arange(x[0], x_crit2, 3600*24*7)
        ax2.plot_date(mdates.epoch2num(x), c_shards * np.exp(k_shards * x), marker='', linestyle='-', linewidth=0.5, alpha=0.0)
        ax1.plot_date(mdates.epoch2num(x), c * np.exp(k * x), label=basename, marker='', linestyle='-', linewidth=2)
 
    fig.suptitle('%s - %d shards now; calc for %d days; %.1f TB warn, %.1f TB crit per shard' % (basename, shards_now, days_interval_warn, warn_shard_size/(1024**4), crit_shard_size/(1024**4)))
    # горизонтальные лимиты
    ax1.axhline(y=warn_size, label='warn size (%d shards)' % (shards_now,), color='green', linestyle='--', linewidth=1.5)
    ax1.axhline(y=crit_size, label='crit size (%d shards)' % (shards_now,), color='red', linestyle='-.', linewidth=1.5)
    ax1.axhline(y=warn_size2, label='warn size (+%d shards)' % (shards_add,), color='gray', linestyle='--', linewidth=2)
    ax1.axhline(y=crit_size2, label='crit size (+%d shards)' % (shards_add,), color='gray', linestyle='-.', linewidth=2)

    ax1.legend(loc='upper left')
    fig.set_size_inches(11, 8.5)
    fig.savefig(graph_name + '.svg', dpi=300)

    print ""

if __name__ == '__main__':
    # функции keepLastValue и прочие по работе с Null будут искажать все до неузнаваемости )
    draw_graph('ppcdata-diskspace', "http://ppcgraphite.yandex.ru/render/?format=json&target=sum(direct_one_min.db_configurations.production.objects.db.ppc.*.*_length.shard*)&from=-18months", 21, '2021-07', 0.9*1024**4, 1.2*1024**4, model='exponential')

    #draw_graph('mod-mongo-diskspace', "http://ppcgraphite.yandex.ru/render/?format=json&target=scale(sumSeries(groupByNode(one_min.mongo???_mod_yandex_net.diskusage_{opt,local}.used, 1, 'maxSeries')), 0.33)&from=-8months", 20, '2019-07', 3*1024**4, 3.5*1024**4)

    draw_graph('ppchouse-diskspace', "http://ppcgraphite.yandex.ru/render/?format=json&target=scale(sum(one_min.ppchouse0*.ClickHouse.compressed_bytes.total), 0.33)&from=-18months", 9, '2021-07', 80*1024**4, 100*1024**4)
