import pandas as pd
import numpy as np
import time
import pickle
import warnings
import yt.yson as yson
warnings.filterwarnings('ignore')
from collections import defaultdict

from business_models import hahn
from business_models.util.dataframes import convert_dtypes_to_yql
from concurrent.futures import ProcessPoolExecutor


bootstrap_results = {
    'deliveries_with_postcard_penetration': 'Postcard penetration uplift',
    'deliveries_with_thermobag_penetration': 'Thermobag penetration uplift',
    'deliveries_with_door_to_door_penetration': 'D2D penetration uplift',
    'postcard_pen_penetration': 'Postcard User penetration uplift',
    'thermobag_pen_penetration': 'Thermobag User penetration uplift',
    'd2d_pen_penetration': 'D2D User penetration uplift',
    'express_couries_delivereies_relation': 'Deliveries uplift (C+E)',
    'cargo_deliveries_relation': 'Deliveries in Cargo uplift',
    'is_express_couries_relation': 'Users uplift in (C+E)',
    'd2d_relation': 'D2D delivereies uplift',
    'thermobag_relation': 'Thermobag deliveries uplift',
    'postcard_relation': 'Postcard deliveries uplift',
    'is_express_couries_ratio': 'Users with delivereis conversion uplift',
    'cargo_us_relation': 'Cargo users uplift',
    'd2d_us_relation': 'D2D users uplift',
    'thermobag_us_relation': 'Thermobag users uplift',
    'postcard_us_relation': 'Postcard users uplift'
}


totals_int = {
    'total_users_y': 'Total',
    'test_count': 'Test',
    'test_skipped': 'Test Skipped',
    'control_count': 'Control',
    'control_skipped': 'Control Skipped',
}

totals_pretty = {
    'delivered': 'Total delivered',
    'total_users_y': 'Total users in compaign',
    'total_users_x': 'Test group total users',
    'test_cargo_deliveries': 'Total Cargo Deliveries',
    'test_is_cargo': 'Total Cargo users',
    'test_express_couries_delivereies': 'Total deliveries (C+E) TG',
#     'control_express_couries_delivereies':'Total deliveries (C+E) CG',
    'test_is_express_couries': 'Total users (C+E) with deliveries in TG',
#     'control_is_express_couries':'Total users (C+E) with deliveries in CG'
}

ratio = {
    'sent_to_delivery': 'Delivery rate',
    'sent_to_shown': 'Shown Rate',
    'sent_to_dismissed': 'Dismissed Rate',
    'sent_to_tapped': 'Tapped Rate',
    # 'fs_received': 'FS Received',
    # 'fs_received_to_closed': 'FS Closed Rate',
    # 'fs_received_to_seen': 'FS Seen Rate',
    # 'fs_received_to_selected': 'FS Selected Rate',
    'test_door_to_door_to_express': 'D2D user penetration',
    'test_door_to_door_delivereies_ratio': 'D2D delivereis penetration',
    'test_postcard_to_express_courier': 'Postcard user penetration',
    'test_postcard_delivereies_ratio': 'Postcard delivereis penetration',
    'test_thermobag_to_express_courier': 'Thermobag user penetration',
    'test_thermobag_delivereies_ratio': 'Thermobag delivereis penetration',
    'c_e_cr': 'C+E convertion',
    'cargo_cr': 'Cargo convertion',
}

def _pretty(number):
    return f'{int(number):,}'.replace(',', ' ')


def _check_helper(results, tag):
    aa = results[f'aa_{tag}'][1]
    ab_left = results[f'ab_{tag}'][1]
    ab_right = results[f'ab_{tag}'][5] if len(results[f'ab_{tag}']) > 2 else results[f'ab_{tag}'][1]
    if aa > 0 and 'relation' not in tag:
        return f'Dif AA'
    if aa > 1 and 'relation' in tag:
        return f'Dif AA'
    if ab_left == -1000.0 or ab_left == -2000.0 or ab_left == -3000.0 or ab_left == -5000.0:
        return f'Not enough data'
    if ab_left > 0 and 'relation' not in tag:
        return f'[{round(ab_left, 2)}pp; {round(ab_right, 2)}pp]'
    if ab_left > 1 and 'relation' in tag:
        ab_left = results[f'ab_{tag}'][8]
        ab_right = results[f'ab_{tag}'][12]
        return f'[{_pretty(ab_left)}; {_pretty(ab_right)}]'
    return 'Not significant'


def new_check_res(row):
    metrics = []
    values = []
    for tag, metric in bootstrap_results.items():
        values.append(_check_helper(row, tag))
        metrics.append(metric)

    for tag, metric in totals_int.items():
        values.append(str(row[tag]))
        metrics.append(metric)
    for tag, metric in totals_pretty.items():
        values.append(_pretty(row[tag]))
        metrics.append(metric)
    for tag, metric in ratio.items():
        values.append(f'{round(row[tag] * 100, 2)}%' if not np.isnan(row[tag]) else None)
        metrics.append(metric)
    return {
        'experiment_id': [row['experiment_id'] for _ in range(len(values))],
        'group_name': [row['group_name'] for _ in range(len(values))],
        'window_selector': [row['window_selector'] for _ in range(len(values))],
        'metrics': metrics,
        'values': values
    }


d1 = {1: 2, 3: 4}
d2 = {1: 6, 3: 7}

dd = defaultdict(list)


main_columns = [
    'experiment_id',
    'creation_time',
    'group_name',
    'campaign_name',
    'country',
    'app_name',
    'window_selector'   
]


countries = {
    'Казахстан': 'KZ',
    'Россия': 'RU',
    'Узбекистан': 'UZ',
    'Кот-д Ивуар': 'CI',
    'Белоруссия': 'BY',
    'Сербия': 'RS',
    'Азербайджан': 'AZ',
    'Израиль': 'IL',
    'Камерун': 'CM',
    'Киргизия': 'KG',
    'Молдавия': 'MD',
    'Грузия': 'GE',
    'Армения': 'AM'
}


health_color = ['Delivery rate', 'Shown Rate',
                'Dismissed Rate', 'Tapped Rate', 
                'Total users in compaign', 'Test group total users',
                'Total delivered']

def get_color(row):
    if not row:
        return '0'
    if 'Not significant' in row:
        return '2'
    if 'Not enough data' in row or 'Dif' in row:
        return '3'
    if '[' in row:
        return '1'
    return '0'


def get_tag(row):
    if '_termo' in row:
        return 'thermobag'
    if '_cargo' in row:
        return 'cargo'
    if '_order' in row:
        return 'order'
    if '_postcard' in row:
        return 'postcrad'
    if '_survey' in row:
        return 'survey'


def addition_columns(row):
    value = None
    campaign = row['campaign_name']
    tag = get_tag(campaign)
    values = row['values']
    metric = row['metrics']
    if (campaign == '"не кампания о доставке"' or tag == 'survey') and metric not in health_color:
        values = None
    color = get_color(values)
    if metric in health_color:
        color = '5'
    return pd.Series([values, color, tag])


if __name__ == '__main__':
    df_2 = hahn.read(
        full_path='//home/taxi-delivery/analytics/dev/marketing/agliukov/crm_exp/after_bootstrap_final')   
    df_health = hahn.read(
        full_path='//home/taxi-delivery/analytics/dev/marketing/agliukov/crm_exp/logs_agg/by_experiment_id_group_final')
    df_2 = df_2.merge(df_health, on=['experiment_id', 'group_name'])
    for d in (df_2.apply(new_check_res, axis=1).values): # you can list as many input dicts as you want here
        for key, value in d.items():
            dd[key].extend(value)
    df_new = df_2[main_columns].merge(pd.DataFrame(dd), on=['experiment_id', 'group_name', 'window_selector'])
    df_new['group_name'] = df_new['group_name'].str.upper()
    df_new['country'] = df_new['country'].apply(lambda x: countries.get(x, x))
    df_new[['values', 'color', 'tag']] = df_new.apply(addition_columns, axis=1)
    hahn.write(df_new,
           table_name='bootstrap_results',
           full_path='//home/taxi-delivery/analytics/dev/marketing/agliukov/crm_exp/bootstrap_results_final_3',
           if_exists='replace'
    )
