# -*- coding: utf-8 -*-
import logging
import pandas as pd

logger = logging.getLogger(__name__)


def prepare_data_df(task):
    logger.debug('%s: prepare data_df', task.issue.key)
    df = task.df
    logger.debug('%s: %s rows in data_df' % (task.issue.key, df.shape[0]))

    if task.data_filters:
        df = apply_data_filters(df, task.data_filters)

    group_cols = task.config['group_by']
    int_cols = ['DirectClicks', 'Visits', 'GoalVisits', 'GoalReaches', 'EcommerceOrderVisits', 'MergedGoalVisits',
                'Bounces', 'PageViews', 'Duration']
    float_cols = ['DirectCost', 'OrderedRevenue']
    df.loc[:, 'Regions'] = df['Regions'].apply(lambda cell: parse_region(cell))
    for col in int_cols:
        df.loc[:, col] = df[col].fillna(0).astype(pd.np.int64)
    for col in float_cols:
        df.loc[:, col] = df[col].fillna(0).astype(pd.np.float64)

    if task.type_ == 'standart':
        df.loc[:, 'BannerType'] = df['BannerType'].replace({u'Видеообъявления': u'Текстово-графические',
                                                            u'Видеодополнения': u'Текстово-графические'})
    elif task.type_ == 'finance':
        df.loc[:, 'BannerType'] = df['BannerType'].replace({u'Видеообъявления': u'Видеообъявления и видеодополнения',
                                                            u'Видеодополнения': u'Видеообъявления и видеодополнения'})
        if not task.dyn_filter:
            df.loc[:, 'BannerType'] = df['BannerType'].replace({u'Динамические': u'Текстово-графические'})
        else:
            if len(df[df['BannerType'] == u'Динамические'].index) != 0:
                task.comment += [u'!!(grey)Из выгрузки отфильтрованы динамические объявления.!!', ' ']
                df = df[df['BannerType'] != u'Динамические']

    df.loc[:, 'TargetingType'] = df['TargetingType'].replace({u'Фразы (синоним)': u'Фразы (прочее)'})

    df = df.fillna('').groupby(group_cols, as_index=False).sum()
    df.loc[:, 'Sum'] = df.sum(axis=1)
    df = df[df['Sum'] > 0]

    if task.type_ == 'retail':
        df.loc[:, 'MoneyDiff'] = df['OrderedRevenue'] - df['DirectCost']
        float_cols.append('MoneyDiff')

    logger.info('%s: %s rows in data_df' % (task.issue.key, df.shape[0]))
    return df.loc[:, group_cols + int_cols + float_cols].reset_index(drop=True)


def apply_data_filters(df, filters):
    for col_name, values in filters.iteritems():
        if values:
            logger.debug(u'filter by {col_name} == {values}: -{len:,.0f}'.format(
                col_name=col_name,
                values=u', '.join(values),
                len=(df.shape[0] - df[df[col_name].isin(values)].shape[0]))
            )
            df = df[df[col_name].isin(values)]

    return df


def parse_region(region_ids):
    try:
        return unicode(int(region_ids))
    except:
        return unicode(region_ids)
