# -*- coding: utf-8 -*-

import pandas as pd
import numpy as np
try:
    import matplotlib.pyplot as plt
except ImportError:
    import matplotlib
    matplotlib.use('agg')
    import matplotlib.pyplot as plt
from lib.constants import *


def get_pair(df, metric, key_cols=(cg,)):
    # print(key_cols + ('%s_usage' % metric, '%s_total' % metric))
    return df[list(key_cols + ('%s_usage' % metric, '%s_total' % metric))]


def smooth(df, window=7):
    temp_df = df.copy().fillna(0)
    for c in temp_df.columns:
        try:
            temp_df[c] = temp_df[c].rolling(window).agg('mean'). \
                interpolate(method='linear', limit_direction='backward')
        except:
            continue
    return temp_df


def expand_row_for_grow_percent(original_row, name, describe_dates, grow_percents):
    row = original_row[original_row.index.isin(describe_dates)].copy()

    row.drop('name', axis=1, inplace=True)
    for gp in grow_percents:
        if gp > 1:
            gp /= 100.
        row = row.mul(1 + gp, fill_value=0)
        row['name'] = '{}_{}%_grow'.format(name, int(gp*100))
    return row


def expand_row_for_grow_percent_cumulative(df, name, start_date, describe_dates, grow_percents):
    df = df[start_date:].cummax(axis=0).copy()
    row = df[df.index.isin(describe_dates)].copy()
    #row.drop('name', axis=1, inplace=True)
    res = pd.DataFrame()
    for gp in grow_percents:
        if gp > 1:
            gp /= 100.
        t_row = row.mul(1 + gp, fill_value=0)
        t_row['name'] = '{}_max_+{}%_grow'.format(name, int(gp*100))
        res = res.append(t_row)
    return res


def cum_max(df, name, describe_dates):
    df = df.cummax(axis=0).copy()
    df = df[df.index.isin(describe_dates)]
    df['name'] = '{}_cumulative_max'.format(name)
    return df


def concat_df_with_wau(_df, _wau_forecast_df, forecast_st_date, forecast_end_date,
                       last_days=20, days_quant=0.90, cummax_columns=(), wau_col='wau', drop_dup_index=False):
    '''
    Слияние реальных данных и предсказанного коэффициента DAU
    В будущем должна быть заменена на полноценную предсказывающую модель
    :param _df:
    :param _wau_forecast_df: датафрейм с ежедневной/недельной аудиторией, на основе которого строятся коэффициенты
    :param forecast_st_date:
    :param forecast_end_date:
    :param last_days:
    :param days_quant:
    :param cummax_columns:
    :param wau_col:
    :return:
    '''
    forecast_st_date, forecast_end_date = pd.to_datetime((forecast_st_date, forecast_end_date))
    df = _df.copy()
    if cg in df.columns:
        df.drop(cg, axis=1, inplace=True)
    wau_df = _wau_forecast_df.copy()[[wau_col]]
    # forecast from last data day multiplying by WAU delta
    wau_df = wau_df[forecast_st_date:forecast_end_date]
    last_values = df.iloc[-last_days:].quantile(days_quant)
    for col in df.columns:
        if 'usage' in col:
            coeffs = (wau_df[[wau_col]] / wau_df[wau_col][0]).values
        else:
            coeffs = np.array([1 for _ in wau_df[wau_col]])
        wau_df[col] = last_values[col] * coeffs
        if col in cummax_columns:
            wau_df[col] = wau_df[col].cummax()
    res = df.append(wau_df.drop(wau_col, axis=1))
    if drop_dup_index:
        return res[~res.index.duplicated()]
    else:
        return res

def plot_predicted_df(df, forecast_start_date, ax, metric, prefix='pred_', grow_percents=[]):
    '''
    Отрисовка предсказанного датафрейма, отрисовываются поля {metric}_usage и {metric}_total
    :param df:
    :param forecast_start_date:
    :param ax:
    :param metric:
    :param prefix:
    :param additional_plots:
    :return:
    '''
    forecast_start_date = pd.to_datetime(forecast_start_date)
    usage_metric = metric + '_usage'
    total_metric = metric + '_total'
    pdf = df[[usage_metric, total_metric]]
    pdf_real = pdf[:forecast_start_date].fillna(0)
    pdf_pred = pdf[forecast_start_date:].fillna(0)

    # Plotting data separated by prediction date
    pdf_real.plot(ax=ax, y=usage_metric, label=usage_metric)
    pdf_real.plot(ax=ax, y=total_metric, label=total_metric)
    pdf_pred.plot(ax=ax, y=usage_metric, label=prefix + usage_metric, linestyle='--')
    pdf_pred.plot(ax=ax, y=total_metric, label=prefix + total_metric)

    for gp in grow_percents:
        if gp >= 1:
            gp /= 100.
        gp_pred = pdf_pred * (1 + gp)
        gp_pred.plot(ax=ax, y=usage_metric, label='{}{}_{}%_grow'.format(prefix, usage_metric, int(gp*100)), linestyle='--')

    ax.tick_params(axis='x', labelsize='small')
    ax.xaxis.label.set_visible(False)
    ax.set_ylim(0, None)
    ax.grid(linestyle='--', linewidth=0.5, )


def plot_df_by_cg(df, name, forecast_start_date, metrics=('cpu', 'disk', 'mem'), figsize=(24, 4), orders=None,
                  instant_plot=False, grow_percents=[]):
    '''
    Отрисовка датафрейма с учётом предсказанных полей
    :param df: подготовленный датафрейм с предсказанными данными
    :param name:
    :param forecast_start_date: дата, делящая датафрейм на реальные и предсказанные данные
    :param metrics:
    :param figsize:
    :param orders:
    :return:
    '''

    fig, axes = plt.subplots(nrows=1, ncols=len(metrics), figsize=figsize)
    fig.suptitle(name, fontsize=20, y=0.96)
    fig.autofmt_xdate()

    ndf = df.copy()

    if orders:
        orders_df = convert_preorders(orders, ndf.index[0], ndf.index[-1])

    for i, m in enumerate(metrics):
        if orders:
            ndf[m + '_total'] += orders_df[m]
        plot_predicted_df(ndf, forecast_start_date, axes[i], m, grow_percents=grow_percents)
    if instant_plot:
        plt.show()


def filter_by_percentage(_df, group_col, perc_col, perc_value, comparator=lambda x, y: x >= y):
    # Фильтрация датафрейма по столбцу, сумма группы в котором должно быть не меньше X процентов от суммы всего столбца
    perc_value /= 100.
    df = _df.groupby(group_col).sum()

    agg_df = df.groupby(cg).mean().reset_index(cg)
    return agg_df[comparator(agg_df[perc_col] / agg_df[perc_col].sum(), perc_value)]


def convert_preorders(preorders, start_date, end_date, order_days_shift=28, skip_before_start=False,
                      columns=('cpu', 'disk', 'mem')):
    '''
    Превращение заказов в возрастающий датафрейм со скачками на соответствующих заказам датах
    :param preorders: list of dicts [{date1: {metric1:v1, metric2:v2}}, ...]
    :param start_date:
    :param end_date:
    :param order_days_shift: move to the end of month
    :return:
    '''
    odf = pd.DataFrame(index=pd.date_range(start_date, end_date))
    for c in columns:
        odf[c] = 0
    for order in preorders:
        dt, metrics = order.items()[0]
        dt = pd.to_datetime(dt) + pd.DateOffset(order_days_shift)
        if skip_before_start and dt < start_date:
            continue
        for c in columns:
            odf[c][dt:end_date] += metrics.get(c, 0)
    return odf


def sum_df_by_names(_df, key_col, names):
    df = _df[_df[key_col].isin(names)]
    df = df.groupby(by=df.index).sum()
    return df

def describe_row(row, fields=('cpu_usage', 'cpu_total', 'disk_usage', 'disk_total')):
    message = ''
    message += '{0!s}\n'.format(row.name)
    for field in fields:
        try:
            message += '{:15} {:.1f}\n'.format(field, row[field])
        except KeyError:
            pass
    return message

def set_col_first(df, col):
    cols = df.columns
    if col not in df:
        raise KeyError
    cols = [c for c in cols if c != col]
    cols.insert(0, col)
    return df[cols]
