import logging
import time
from datetime import datetime, timedelta
from io import StringIO

from requests.exceptions import ConnectionError

from .settings.rtc_settings import GRAPHITE_URL, metrics


def get_services(service_prefix,
                 category_prefix,
                 only_services_names=False,
                 failover_retries_count=5,
                 date=datetime.today()):
    logging.debug('RTC.get_services')
    import pandas as pd

    url = graphite_api_url(
        GRAPHITE_URL,
        metrics['services'].format(service_prefix=service_prefix, category_prefix=category_prefix),
        date
    )
    logging.info('Services url: ' + str(url))
    try:
        graphite_data = get_graphite_data(url)
        logging.info('Graphite data: ' + str(graphite_data))

        df = pd.read_csv(graphite_data, names=["metric", "ts", "value", "garbage"]).dropna(subset=['value'])
        logging.info('RTC data: ' + str(df))
        if df.empty:
            raise ValueError("DataFrame is empty!")

        df['service'] = df['metric'].apply(lambda m: m.split(".")[m.split(".").index("cpu") - 1])
        services = (
            df[['service', 'value']].groupby('service')
                                    .sum()
                                    .sort_values(by='value', ascending=False)
                                    .index
                                    .tolist()
        )

        if only_services_names:
            ret = []
            for name in services:
                ret.append(name.split("_")[1:-1])
            return list(set(["_".join(name) for name in ret]))
        return services
    except (ConnectionError, ValueError), e:
        if failover_retries_count > 0:
            time.sleep(1)
            return get_services(
                service_prefix,
                category_prefix,
                only_services_names,
                failover_retries_count - 1,
                date - timedelta(days=1)
            )
        else:
            raise e


def get_cpu_cores_per_instance(services,
                               category_prefix,
                               failover_retries_count=5,
                               date=datetime.today()):
    import pandas as pd

    url = graphite_api_url(
        GRAPHITE_URL,
        metrics['services_cores'].format(
            category_prefix=category_prefix,
            services=''.join(['{', ','.join(services), '}'])),
        date
    )
    logging.info('Cpu cores url: ' + str(url))

    df = pd.read_csv(
        get_graphite_data(url),
        names=["metric", "ts", "value"]
    )

    df.dropna(subset=['value'], inplace=True)
    if df.empty:
        logging.info('DataFrame is empty!')
        if failover_retries_count <= 0:
            return None
        else:
            time.sleep(1)
            date = date - timedelta(days=1)
            return get_cpu_cores_per_instance(
                services,
                category_prefix,
                failover_retries_count - 1,
                date
            )

    df['service'] = df['metric'].apply(lambda m: m.split(".")[m.split(".").index("cpu") - 1])
    df = df[df.service.isin(services)]
    df['dc'] = df['service'].apply(lambda s: s.split("_")[-1])
    df.drop('metric', axis=1, inplace=True)

    cores_per_instance = df[['service', 'value']].groupby('service').mean()['value'].mean()
    logging.info('min_cpu_cores for services ' + str(services) + ': ' + str(cores_per_instance))
    return cores_per_instance


def get_memory_per_instance(services,
                            category_prefix,
                            failover_retries_count=5,
                            date=datetime.today()):
    import pandas as pd

    url = graphite_api_url(
        GRAPHITE_URL,
        metrics['services_memory'].format(category_prefix=category_prefix,
                                          services=''.join(['{', ','.join(services), '}'])),
        date
    )
    logging.info('Max memory url: ' + str(url))

    df = pd.read_csv(
        get_graphite_data(url),
        names=["metric", "ts", "value"]
    )

    df.dropna(subset=['value'], inplace=True)
    if df.empty:
        logging.info('DataFrame is empty!')
        if failover_retries_count <= 0:
            return None
        else:
            time.sleep(1)
            date = date - timedelta(days=1)
            return get_memory_per_instance(
                services,
                category_prefix,
                failover_retries_count - 1,
                date
            )

    df['service'] = df['metric'].apply(lambda m: m.split(".")[m.split(".").index("memory") - 1])
    df = df[df.service.isin(services)]
    df['dc'] = df['service'].apply(lambda s: s.split("_")[-1])
    df.drop('metric', axis=1, inplace=True)

    memory_per_instance = df[['service', 'value']].groupby('service').mean()['value'].mean()
    logging.info('memory_per_instance for services ' + str(services) + ': ' + str(memory_per_instance))
    return memory_per_instance


def get_dc_minus_1_prod_instance_count(services,
                                       category_prefix,
                                       failover_retries_count=5,
                                       date=datetime.today()):
    import pandas as pd

    url = graphite_api_url(
        GRAPHITE_URL,
        metrics['services_instances'].format(category_prefix=category_prefix,
                                             services=''.join(['{', ','.join(services), '}'])),
        date
    )
    logging.info('Instance count url: ' + str(url))

    df = pd.read_csv(
        get_graphite_data(url),
        names=["metric", "ts", "value"]
    )

    df.dropna(subset=['value'], inplace=True)

    if df.empty:
        logging.info('DataFrame is empty!')
        if failover_retries_count <= 0:
            return None
        else:
            time.sleep(1)
            date = date - timedelta(days=1)
            return get_dc_minus_1_prod_instance_count(
                services,
                category_prefix,
                failover_retries_count - 1,
                date
            )

    df['service'] = df['metric'].apply(lambda m: m.split(".")[m.split(".").index("instance_count") - 1])
    df = df[df.service.isin(services)]
    df['dc'] = df['service'].apply(lambda s: s.split("_")[-1])
    df.drop('metric', axis=1, inplace=True)

    instance_count_per_dc = df[['service', 'value']].groupby('service').mean()['value'].min()
    logging.info('min instance count per dc for services {}: {}'.format(services, instance_count_per_dc))
    dc_minus_1_count = len(services) - 1
    logging.info('dc_minus_1_count: {}'.format(dc_minus_1_count))
    return instance_count_per_dc * dc_minus_1_count


def get_graphite_data(url):
    import requests
    req = requests.get(url, timeout=5.)
    if 200 == req.status_code:
        content = req.content
        ret = content.decode(req.encoding) if req.encoding else content.decode('utf8', "ignore")
        return StringIO(ret)
    else:
        raise Exception(req.text, req.status_code)


def graphite_api_url(API_URL, metric, date_to):
    date_to_str = date_to.strftime('%Y%m%d')
    date_from_str = (date_to - timedelta(days=1)).strftime('%Y%m%d')
    return API_URL.format(metric=metric, date_from=date_from_str, date_to=date_to_str)


def get_service_rtc_data(category_prefix, service_prefix, service_filter=None):
    services = get_services(category_prefix=category_prefix, service_prefix=service_prefix)
    logging.info('RTC services before filter: ' + str(services))
    if service_filter is not None:
        services = service_filter(services)
        logging.info('RTC services after filter: ' + str(services))

    rtc_data = get_rtc_data(services, category_prefix)
    return rtc_data


def get_rtc_data(services, category_prefix):
    logging.info('Ready to get RTC data for services: ' + str(services))
    cores_count = get_cpu_cores_per_instance(services=services, category_prefix=category_prefix)
    memory_size = get_memory_per_instance(services=services, category_prefix=category_prefix)
    return cores_count, memory_size
