import pandas as pd
import numpy as np
import ujson
import yt.wrapper as yt


def records_iter(df: pd.DataFrame):
    """ Auxiliary function used in upload_df_to_yt.
    """
    for _, row in df.iterrows():
        rec = ujson.loads(row.to_json())
        yield rec


def wmape(y: float, yhat: float) -> float:
    """Auxiliary function used in forecast.
    """
    nominator = np.sum(np.abs(yhat - y))
    denominator = np.sum(y)
    return nominator / denominator


def generate_forecast_model_id(forecast_params: dict) -> str:
    model_id = (
        f"{forecast_params['prophet']['growth'][0:3]}_"
        f"{forecast_params['prophet']['seasonality_mode'][0:3]}_"
        f"{forecast_params['prophet']['changepoint_prior_scale']}_"
        f"{'log1' if forecast_params['is_log_y'] else 'log0'}_"
        f"{'out1' if forecast_params['is_delete_outliers'] else 'out0'}_"
        f"{forecast_params['outlier_z_value']}z_"
        f"{forecast_params['forecast_cap']}cap"
    )
    return model_id


def get_yt_client(proxy: str, token: str) -> yt.YtClient:
    """
    Args:
        proxy: YT cluster name
        token: YT token from vault

    Returns:
        YtClient: YtClient object
    """
    return yt.YtClient(proxy=proxy, token=token)


def upload_df_to_yt(client, df: pd.DataFrame, config: dict, is_append: bool = False) -> None:
    client.create(
        type="table",
        path=config["table_path"],
        attributes={"schema": config["yt_scheme"]},
        ignore_existing=True,
    )

    df = df.astype(config["columns_type"])
    df = df.rename(columns=config["columns_new_names"])

    client.write_table(
        yt.TablePath(
            config["table_path"], append=is_append
        ),
        records_iter(df),
    )

    print(f"Upload to https://yt.yandex-team.ru/hahn/navigation?path={config['table_path']}")


def load_actual(path, client) -> pd.DataFrame:
    df = yt.read_table(path, client=client)
    df = pd.DataFrame(list(df))
    return df


def preprocess(df, config):
    df = df.rename(columns=config["columns_new_names"])

    df["ds"] = pd.to_datetime(df["ds"], format=config["ds_column_format"])

    most_actual_df = df.groupby("group", as_index=False).ds.max()
    most_actual_df = most_actual_df.assign(
        no_data_days=(pd.Timestamp.now() - most_actual_df.ds).dt.days
    )

    df = df.merge(
        most_actual_df.loc[
            most_actual_df["no_data_days"] < config["min_last_days_should_be"],
            "group",
        ],
        on="group",
    )

    df = df.loc[df["ds"] <= df["ds"].max().floor("D")]

    if config["is_step_back"]:
        df = df.loc[
            df["ds"] < df["ds"].max() - pd.Timedelta(config["step_back_days"], "days")
            ]

    print(f"In actual {df.shape[0]} rows for {len(df['group'].unique())} time series")

    return df


def delete_outliers(df: pd.DataFrame, window: int, threshold: float) -> pd.DataFrame:
    row_before_deleting_outliers = df.shape[0]

    #  Rolling mean and standard deviation to calculate z-value
    rolling_mean = df["y"].rolling(window).mean().bfill()
    rolling_std = df["y"].rolling(window).std().ffill().bfill()

    # Calculate z-value
    df.loc[:, "z"] = (df["y"] - rolling_mean) / rolling_std

    # Replace outliers with None and delete z column
    df.loc[df["z"] > threshold, "y"] = None

    # Calculate number of outliers
    print(f"Number of outliers = {row_before_deleting_outliers - df.shape[0]}")

    df = df.drop("z", axis=1)
    return df


def seasonal_naive(y: pd.Series, season_length: int, horizon: int) -> pd.Series:
    if len(y) >= season_length:
        last_season = y.iloc[-season_length:]
        seasons_count = int(np.ceil(horizon / season_length))
        forecast_seasons = np.tile(last_season, seasons_count)
        forecast = pd.Series(forecast_seasons[:horizon])
    else:
        print("Not enough data for seasonal forecast")
        return pd.Series(0, index=range(horizon))
    return forecast
