# -*- coding: utf-8 -*-

import pandas as pd
import logging
import os
import statface_client

from itertools import chain

from lib.collectors.yasm_collector import (yasmhwcollector,
                                           yasmqloudcollector,
                                           yasmpgcollector,
                                           yasmsharpeicollector,
                                           yasmmdbcollector,
                                           yasmdeploycollector)

logging.basicConfig(level=logging.DEBUG, format="[%(asctime)s] %(levelname)s %(name)s %(message)s")
logger = logging.getLogger(__name__)

class YasmExtractor(object):
    LOG_NAME = ''

    def __init__(self, specific_hosts=None, ignore_empty=True, *args, **kwargs):
        self.collector = None
        self.stat_token = os.getenv('STAT_TOKEN')
        self.stat_client = statface_client.StatfaceClient(oauth_token=self.stat_token, host=statface_client.STATFACE_PRODUCTION)
        self.stat_report = self.stat_client.get_report('Disk/Admin/Resources')
        self._hosts = None
        self.specific_hosts = specific_hosts
        self.ignore_empty = ignore_empty
        if not self.stat_token:
            raise EnvironmentError('STAT_TOKEN variable is not set')

        self.calc_columns = [
            'cpu_usage', 'cpu_total',
            'disk_usage', 'disk_total',
            'mem_usage', 'mem_total',]

    def upload(self, data):
        res = self.stat_report.upload_data('d', data)
        logger.info('Rows uploaded to stat: %s' % res)

    def process(self, date):
        data = self.parse(self.collect(date))
        self.upload(data)

    def _filter_hosts(self, hosts, spec_hosts):
        # Return only groups with needed hosts
        fh = {g:[h for h in hst if h in spec_hosts] for g, hst in hosts.items()}
        return {k:v for k, v in fh.items() if v}

    @property
    def hosts(self):
        if not self._hosts:
            self._hosts = self.collector.get_all_hosts()
            if self.specific_hosts:
                    self._hosts = self._filter_hosts(self._hosts, self.specific_hosts)
            logger.info('Hosts initialized: %s' % self._hosts)
        return self._hosts

    def collect(self, date):
        hosts = self.hosts
        l_hosts = sum(len(x) for x in hosts.values())
        logger.info('Collecting %s hosts per date: %s' % (self.LOG_NAME, date))
        logger.info('%s groups to be collected' % len(hosts))
        logger.info('%s hosts to be collected' % l_hosts)
        collected = 1
        data = []
        for group, _hosts in hosts.items():
            for host in _hosts:
                logger.info('Collecting %s stat: %s/%s' % (host, collected, l_hosts))
                collected += 1
                try:
                    dt = self.collector.collect_stats(host, date)
                    dt['cgroup'] = group
                    data.append(dt)
                except Exception as e:
                    logger.info('Raised exception %s while handling %s' % (e, host))
        return data

    def parse(self, data):
        df = pd.DataFrame(data)
        logger.info('%s rows extracted' % len(df))
        tdf = df[['date', 'cgroup', 'fqdn'] + self.calc_columns].copy()
        tdf.rename(columns={'date': 'fielddate'}, inplace=True)
        tdf['fielddate'] = pd.to_datetime(tdf['fielddate'], unit='s').dt.strftime('%Y-%m-%d')
        tdf[self.calc_columns] = tdf[self.calc_columns].fillna(0)
        d_data = tdf.to_dict(orient='records')
        if self.ignore_empty:
            d_data = self._skip_empty(d_data)
        return d_data

    def _skip_empty(self, data):
        return [{k: v for k, v in d.items() if v} for d in data]


class QloudExtractor(YasmExtractor):
    LOG_NAME = 'Qloud'

    def __init__(self, *args, **kwargs):
        super(QloudExtractor, self).__init__(*args, **kwargs)
        self.collector = yasmqloudcollector

    def parse(self, data, enrich=True):
        df = pd.DataFrame(data)
        logger.info('%s rows extracted' % len(df))
        tdf = df[['date', 'cgroup', 'fqdn',] + self.calc_columns].copy()
        tdf.rename(columns={'date': 'fielddate'}, inplace=True)
        tdf['fielddate'] = pd.to_datetime(tdf['fielddate'], unit='s').dt.strftime('%Y-%m-%d')
        tdf[self.calc_columns] = tdf[self.calc_columns].fillna(0)
        if enrich:
            # Subgroup every cathegory into groups
            tdf = self._enrich_data(tdf)

        # Swap fqdn and cgroup to make it more similar to hardware metrics
        tdf[['fqdn', 'cgroup']] = tdf[['cgroup', 'fqdn']]
        d_data = tdf.to_dict(orient='records')
        if self.ignore_empty:
            d_data = self._skip_empty(d_data)
        return d_data


    def _enrich_data(self, df, key_col='fqdn', groups_col=('cgroup', 'fielddate')):
        # Split fqdn  column by dot and insert inplace
        exps = ['p%s' % s for s in range(3)]
        tdf = df.loc[:, df.columns != key_col]
        tdf[exps] = df[key_col].str.split('.', len(exps) - 1, expand=True)

        # Groupby and sum columns by fqdn subgroups up to all of them
        value_cols = [c for c in df.columns if c not in chain((key_col,), groups_col)]
        sub_dfs = []
        for i in range(1, len(exps) + 1):
            sub_dfs.append(tdf.groupby(list(chain(groups_col, exps[:i])), as_index=False)[value_cols].sum())

        # Concat fqdn subgroups into fqdn, then remove fqdn
        sdf = pd.concat(sub_dfs)
        sdf.reset_index(drop=True, inplace=True)
        sdf[exps] = sdf[exps].fillna('')
        sdf[key_col] = sdf[exps].apply(lambda r: '.'.join(r.values.astype(str)).strip('.'), axis=1)
        sdf.drop(columns=exps, inplace=True)
        return sdf


class DeployExtractor(YasmExtractor):
    LOG_NAME = 'Qloud'

    def __init__(self, *args, **kwargs):
        super(DeployExtractor, self).__init__(*args, **kwargs)
        self.collector = yasmdeploycollector

    def parse(self, data, enrich=False):
        df = pd.DataFrame(data)
        logger.info('%s rows extracted' % len(df))
        tdf = df[['date', 'cgroup', 'fqdn',] + self.calc_columns].copy()
        tdf.rename(columns={'date': 'fielddate'}, inplace=True)
        tdf['fielddate'] = pd.to_datetime(tdf['fielddate'], unit='s').dt.strftime('%Y-%m-%d')
        tdf[self.calc_columns] = tdf[self.calc_columns].fillna(0)
        if enrich:
            # Subgroup every cathegory into groups
            tdf = self._enrich_data(tdf)

        # Swap fqdn and cgroup to make it more similar to hardware metrics
        tdf[['fqdn', 'cgroup']] = tdf[['cgroup', 'fqdn']]
        d_data = tdf.to_dict(orient='records')
        if self.ignore_empty:
            d_data = self._skip_empty(d_data)
        return d_data


class HwExtractor(YasmExtractor):
    LOG_NAME = 'HW'

    def __init__(self, *args, **kwargs):
        super(HwExtractor, self).__init__(*args, **kwargs)
        self.collector = yasmhwcollector


class PGExtractor(YasmExtractor):
    LOG_NAME = 'PG'

    def __init__(self, *args, **kwargs):
        super(PGExtractor, self).__init__(*args, **kwargs)
        self.collector = yasmpgcollector
        self.stat_client = statface_client.StatfaceClient(oauth_token=self.stat_token, host=statface_client.STATFACE_PRODUCTION)
        self.stat_report = self.stat_client.get_report('Disk/Admin/Resources')


class SharpeiExtractor(PGExtractor):
    LOG_NAME = 'Sharpei'

    def __init__(self, *args, **kwargs):
        super(SharpeiExtractor, self).__init__(*args, **kwargs)
        self.collector = yasmsharpeicollector


class MDBExtractor(YasmExtractor):
    LOG_NAME = 'MDB'

    def __init__(self, *args, **kwargs):
        super(MDBExtractor, self).__init__(*args, **kwargs)
        self.collector = yasmmdbcollector
        self.stat_client = statface_client.StatfaceClient(oauth_token=self.stat_token, host=statface_client.STATFACE_PRODUCTION)
        self.stat_report = self.stat_client.get_report('Disk/Admin/Resources')

    def collect(self, date):
        hosts = self.hosts
        l_hosts = sum(len(x) for x in hosts.values())
        logger.info('Collecting %s hosts per date: %s' % (self.LOG_NAME, date))
        logger.info('%s groups to be collected' % len(hosts))
        logger.info('%s hosts to be collected' % l_hosts)
        collected = 1
        data = []
        for group, _hosts in hosts.items():
            for host in _hosts:
                logger.info('Collecting %s stat: %s/%s' % (host, collected, l_hosts))
                collected += 1
                try:
                    dt = self.collector.collect_stats(host, date)
                    dt['cgroup'] = group
                    data.append(dt)
                except Exception as e:
                    logger.info('Raised exception %s while handling %s' % (e, host))
        return data
