# -*- coding: utf-8 -*-
from datetime import datetime, timedelta
from itertools import product
import logging
import pandas as pd
from random import getrandbits
from yql.client.operation import YqlOperationType
from make_data import DICTS
from make_data.yql_worker import read_table, YQLWorker
from make_queries import (
    clickhouse_request,
    get_campaigns_table,
    get_domains_condition,
    get_domains_condition_for_chyt,
    get_goals_condition,
    get_query_txt,
    get_regions_condition,
    get_regions_condition_for_chyt,
    get_visits_select,
    ATTR_PREFIX_TO_ID,
)

logger = logging.getLogger(__name__)


def load_cpa_data(cpa_task):
    data_included, result_df = pd.DataFrame(), pd.DataFrame()

    for r_num, (region, (idx, client)) in enumerate(product(cpa_task.regions, cpa_task.task_df.iterrows()), 1):
        client['Regions'] = region

        logger.info(u'{}: load visits {}, {} - {}, geo - {}'.format(
            cpa_task.issue.key, client['Client'], cpa_task.period['first_date'], cpa_task.period['last_date'], region))
        visits_df = load_visits(cpa_task, client)
        logger.info(u'{}: load clicks {}, {} - {}, geo - {}'.format(
            cpa_task.issue.key, client['Client'], cpa_task.period['first_date'], cpa_task.period['last_date'], region))
        clicks_df = load_clicks(cpa_task, client)
        costs_df = load_costs(cpa_task, client)

        if not clicks_df.empty and not costs_df.empty:
            logger.debug('{}: join clicks {}x{} and costs {}x{}'.format(
                cpa_task.issue.key, clicks_df.shape[0], clicks_df.shape[1], costs_df.shape[0], costs_df.shape[1]))
            clicks_df = clicks_df.join(costs_df, how='outer', lsuffix='', rsuffix='_chyt')
            clicks_df['DirectCost'] = clicks_df['DirectCost'].fillna(0.) + clicks_df['DirectCost_chyt'].fillna(0.)
        else:
            clicks_df = clicks_df if not clicks_df.empty else costs_df

        if not visits_df.empty and not clicks_df.empty:
            cur_result = visits_df.join(clicks_df, how='outer')
        else:
            cur_result = visits_df if not visits_df.empty else clicks_df

        if not cur_result.empty:
            data_included = pd.concat([data_included, pd.DataFrame(client).T])
            result_df = pd.concat([result_df, cur_result])

        else:
            logger.info(u'{}: empty client - {}, geo - {}'.format(
                cpa_task.issue.key, client['Client'], region))

        logger.info('{}: {} rows processed of {}'.format(
            cpa_task.issue.key, r_num, len(cpa_task.regions) * len(cpa_task.task_df.index)))

        if cpa_task.break_task():
            break

    if data_included.empty:
        return data_included, result_df

    data_included = (data_included
                     .reset_index(drop=True)
                     .loc[:, ['Client', 'Category', 'CounterID', 'GoalID', 'ClientID', 'CampaignID', 'Regions']]
                     .rename(columns=DICTS['column_names'])
                     .fillna('')
                     )

    result_df = result_df.reset_index(drop=False)
    result_df = result_df.replace(to_replace=DICTS['replace_names'])

    result_df['DirectCost'] = result_df['DirectCost'] * cpa_task.cur_coef
    if cpa_task.vat:
        result_df['DirectCost'] = result_df.apply(
            lambda row: row['DirectCost'] * 1.18 if datetime.strptime(row['Date'], '%Y-%m-%d') < datetime(2019, 1, 1)
            else row['DirectCost'] * 1.2, axis=1)

    result_df['Regions'] = result_df['Regions'].replace(to_replace='', value=u'Весь мир')
    if cpa_task.replace_categories:
        result_df['Category'] = result_df['Category'].replace(to_replace=DICTS['Category'])

    else:
        result_df['Category'] = result_df['Category'].replace(to_replace='', value=u'Без учёта категоризации')

    cols = [
        'Date',
        'Client',
        'Category',
        'Regions',
        'Place',
        'Device',
        'BannerType',
        'TargetingType',
        'DirectClicks',
        'DirectCost',
        'Visits',
        'GoalVisits',
        'GoalReaches',
        'EcommerceOrderVisits',
        'MergedGoalVisits',
        'OrderedRevenue',

        'Bounces',
        'PageViews',
        'Duration',

        'FirstTimeCalls',
        'SecondTimeCalls',
        'MissedCalls',
        'HoldDuration',
        'TalkDuration'
    ]
    result_df = (result_df
                 .groupby(DICTS['groupby'], as_index=False).sum()
                 .loc[:, cols]
                 .rename(columns=DICTS['column_names'])
                 .fillna('')
                 )

    return data_included, result_df


def load_costs(cpa_task, client):
    first_date = cpa_task.period['first_date']
    last_date = cpa_task.period['last_date']
    table_path = '//tmp/{issue_key}_cost_{from_date}_{to_date}_{hash}'.format(
        issue_key=cpa_task.issue.key, from_date=first_date, to_date=last_date, hash=getrandbits(128))

    if client['CampaignID']:
        clids_condition = ''
        cids_condition = 'AND directcampaignid IN ({})'.format(client['CampaignID'])

    elif client['ClientID']:
        cids_condition = ''
        clids_condition = 'AND clientid IN ({})'.format(client['ClientID'])

    if cpa_task.domain_filters:
        domains_condition = get_domains_condition_for_chyt(client['DomainID'])
    else:
        domains_condition = ''

    query_name = 'conversion_costs'
    query = get_query_txt(query_name).format(
        table_path=table_path,
        campaigns_table=get_campaigns_table() if cpa_task.rules else '',
        date_func=cpa_task.date_func,
        from_date=first_date, to_date=last_date,
        from_date_max=max(first_date, '2020-03-25'),
        to_date_next=max(
            '2020-03-25',
            (datetime.strptime(last_date, '%Y-%m-%d') + timedelta(days=31 * 2)).strftime('%Y-%m-%d')
        ),
        campaign_field='CampaignName' if cpa_task.rules else '\'\'',
        clids_condition=clids_condition,
        cids_condition=cids_condition,
        domains_condition=domains_condition,
        regions_condition=get_regions_condition_for_chyt(client['Regions'])
    ).encode('utf-8')

    if cpa_task.test_mode:
        with open('cur_%s_query.txt' % query_name, 'w') as fd:
            fd.write(query)

    logger.debug('{}: {} in {} including - {} excluding'.format(
        cpa_task.issue.key, query_name, first_date, last_date))
    task_worker = YQLWorker(cpa_task, query, result_names=[], operation_type=YqlOperationType.CLICKHOUSE)
    task_worker.run()
    result_df = read_table(table_path, DICTS['costs'])

    return prepare_result_df(cpa_task, client, result_df)


def load_visits(cpa_task, client):
    first_date = datetime.strptime(cpa_task.period['first_date'], '%Y-%m-%d')
    last_date = datetime.strptime(cpa_task.period['last_date'], '%Y-%m-%d')

    visits_select = get_visits_select(cpa_task.separate_data, cpa_task.attr_prefix)
    if cpa_task.separate_data:
        clids_condition, cids_condition = '', ''

    else:
        if client['CampaignID']:
            clids_condition = ''
            cids_condition = 'AND {attr_prefix}ClickDirectCampaignID IN ({filters})'.format(
                attr_prefix=cpa_task.attr_prefix, filters=client['CampaignID'])

        elif client['ClientID']:
            cids_condition = ''
            clids_condition = 'AND {attr_prefix}ClickClientID IN ({filters})'.format(
                attr_prefix=cpa_task.attr_prefix, filters=client['ClientID'])

    result_df = run_queries(
        cpa_task, client, 'visits_all',

        date_func=cpa_task.date_func,
        attr_prefix=cpa_task.attr_prefix,

        first_date=first_date, last_date=last_date,

        campaign_field='{}ClickOrderID'.format(cpa_task.attr_prefix) if cpa_task.rules else '\'\'',
        visits=visits_select['visits'],
        bounces=visits_select['bounces'], views=visits_select['views'], duration=visits_select['duration'],

        counters=client['CounterID'],
        goals=client['GoalID'] if client['GoalID'] else '0',
        goals_condition=get_goals_condition(client['GoalID']),
        clids_condition=clids_condition,
        cids_condition=cids_condition,
        regions_condition=get_regions_condition(client['Regions'], attr_prefix=cpa_task.attr_prefix)
    )

    if cpa_task.calls:
        calls_df = run_queries(
            cpa_task, client, 'calls', days=60,

            date_func=cpa_task.date_func,
            attr_prefix=cpa_task.attr_prefix,

            first_date=first_date, last_date=last_date,

            campaign_field='{}ClickOrderID'.format(cpa_task.attr_prefix) if cpa_task.rules else '\'\'',

            counters=client['CounterID'],
            goals=client['GoalID'] if client['GoalID'] else '0',
            goals_condition=get_goals_condition(client['GoalID']),
            clids_condition=clids_condition,
            cids_condition=cids_condition,
            regions_condition=get_regions_condition(client['Regions'], attr_prefix=cpa_task.attr_prefix)
        )

        if not result_df.empty:
            result_df = result_df.set_index(['Date', 'CampaignName', 'Place', 'Device', 'BannerType', 'TargetingType'])
        if not calls_df.empty:
            calls_df = calls_df.set_index(['Date', 'CampaignName', 'Place', 'Device', 'BannerType', 'TargetingType'])

        if not result_df.empty and not calls_df.empty:
            result_df = result_df.join(calls_df, how='outer').fillna(0)
            result_df = result_df.reset_index()

    if cpa_task.rules and not result_df.empty:
        result_df = load_campaign_names(cpa_task, result_df)

    return prepare_result_df(cpa_task, client, result_df)


def load_campaign_names(cpa_task, result_df):
    table_path = '//tmp/{issue_key}_campaigns_{hash}'.format(issue_key=cpa_task.issue.key, hash=getrandbits(128))

    query_name = 'campaign_names'
    query = get_query_txt(query_name).format(
        table_path=table_path,
        cids=u', '.join(str(cid) for cid in result_df['CampaignName'].unique().tolist()),
    ).encode('utf-8')
    if cpa_task.test_mode:
        with open('cur_%s_query.txt' % query_name, 'w') as fd:
            fd.write(query)

    logger.debug('{}: {}'.format(cpa_task.issue.key, query_name))
    task_worker = YQLWorker(cpa_task, query, result_names=[], operation_type=YqlOperationType.CLICKHOUSE)
    task_worker.run()
    cids_df = read_table(table_path, DICTS['campaigns'])
    cids_dict = cids_df.set_index('OrderID').to_dict('index')

    result_df['CampaignName'] = result_df['CampaignName'].apply(lambda cell: cids_dict[cell]['CampaignName'])

    return result_df


def load_clicks(cpa_task, client):
    first_date = datetime.strptime(cpa_task.period['first_date'], '%Y-%m-%d')
    last_date = datetime.strptime(cpa_task.period['last_date'], '%Y-%m-%d')

    if client['CampaignID']:
        clids_condition = ''
        cids_condition = 'AND DirectCampaignID IN ({})'.format(client['CampaignID'])

    elif client['ClientID']:
        cids_condition = ''
        clids_condition = 'AND ClientID IN ({})'.format(client['ClientID'])

    domains_condition = get_domains_condition(client['Client'], client['DomainID']) if cpa_task.domain_filters else ''

    result_df = run_queries(
        cpa_task, client, 'click_storage',

        date_func=cpa_task.date_func,
        attr_prefix=cpa_task.attr_prefix,

        first_date=first_date, last_date=last_date,

        campaign_field='OrderID' if cpa_task.rules else '\'\'',
        clids_condition=clids_condition,
        cids_condition=cids_condition,
        domains_condition=domains_condition,
        regions_condition=get_regions_condition(client['Regions'])
    )

    if cpa_task.rules and not result_df.empty:
        result_df = load_campaign_names(cpa_task, result_df)

    return prepare_result_df(cpa_task, client, result_df)


def run_queries(cpa_task, client, query_name, days=30, **kwargs):
    first_date, last_date = kwargs.get('first_date'), kwargs.get('last_date')
    kwargs['attr_ID'] = ATTR_PREFIX_TO_ID[kwargs.get('attr_prefix')]
    result_df = pd.DataFrame()
    query_template = get_query_txt(query_name)
    from_date = first_date

    while from_date <= last_date:
        to_date = min(last_date, from_date + timedelta(days=days))

        query = query_template.format(
            from_date=from_date.strftime('%Y-%m-%d'),
            to_date=to_date.strftime('%Y-%m-%d'),
            **kwargs
        ).encode('utf-8')
        if cpa_task.test_mode:
            with open('cur_%s_query.txt' % query_name, 'w') as fd:
                fd.write(query)

        logger.debug('{}: {} in {:%Y-%m-%d} including - {:%Y-%m-%d} excluding'.format(
            cpa_task.issue.key, query_name, from_date, to_date))
        tmp_df = clickhouse_request(cpa_task, query, DICTS[query_name])
        if tmp_df is not None:
            result_df = pd.concat([result_df, tmp_df])

        from_date = to_date
        if from_date == last_date:
            break

    # return prepare_result_df(cpa_task, client, result_df)
    return result_df


def prepare_result_df(cpa_task, client, df):
    if df.empty:
        return df

    df['Client'] = [client['Client']] * len(df.index)
    df['Regions'] = [client['Regions']] * len(df.index)

    if cpa_task.rules:
        df['Client'] = df['Client'].apply(lambda cell: cell.split('__')[0])
        df['Category'] = df.apply(
            lambda row: apply_rules(cpa_task.rules.get(client['Client']), row), axis=1)
    else:
        df['Category'] = [client['Category']] * len(df.index)

    if cpa_task.replace_categories:
        df['Category'] = df.apply(get_category, axis=1)

    df = (df_astype(df)
          .groupby(DICTS['groupby'], as_index=True)
          .sum()
          )
    df = df.replace(to_replace=0, value=pd.np.nan).dropna(axis='index', how='all').fillna(0)

    return df


def df_astype(df):
    for col_name in df.columns:
        if col_name in ['Visits', 'GoalVisits', 'GoalReaches', 'EcommerceOrderVisits', 'MergedGoalVisits',
                        'Bounces', 'PageViews', 'DirectClicks']:
            df[col_name] = df[col_name].replace(to_replace='', value=0).astype(pd.np.int)

        elif col_name in ['OrderedRevenue', 'Duration', 'DirectCost']:
            df[col_name] = df[col_name].replace(to_replace='', value=0.0).astype(pd.np.float)

    return df


def apply_rules(rules, row):
    try:
        campaign_name = row['CampaignName'].decode('utf-8')
    except:
        campaign_name = row['CampaignName']

    if rules:
        campaign_name = campaign_name.lower()
        for rule in rules.get('rules'):
            rule_fits = True
            for word in rule.get('words'):
                if word not in campaign_name:
                    rule_fits = False
                    break

            for minus_word in rule.get('minus-words'):
                if minus_word in campaign_name:
                    rule_fits = False
                    break

            if rule_fits:
                return rule.get('category')
        return rules['default']

    else:
        return 'other'


def get_category(row):
    if row['BannerType'] in ['dynamic_banner', 'search_banner', 'smart_banner']:
        return row['BannerType']

    elif row['TargetingType'] == 'retargeting':
        return row['TargetingType']

    else:
        return row['Category']
