from nile.api.v1 import (
    aggregators as na,
    filters as nf,
    extractors as ne,
    grouping as ng,
    clusters,
    files,
    statface,
    Record,
    Template,
    Path
)
from qb2.api.v1 import filters as sf
from qb2.api.v1 import QB2, extractors as se
import pandas as pd
import datetime
import numpy as np
import re
from functools import partial
import ast
import itertools
cluster = clusters.Hahn(pool='mobile-research', token='a6575e1e15b0475fb8d8564beaa60f23').env(
    templates=dict(
        job_root='home/turkey-analytics/ktereshin'
    )
)
def load_all_bad_campaign_info_from_hahn(table_path_list):
    job = cluster.job()
    dict_of_df = {}
    for path in table_path_list:
        dict_of_df[path.split('/')[4]] = job.table(path).read().as_dataframe()
    return dict_of_df
def load_all_installs_by_campaign(path):
    job = cluster.job()
    return job.table(path).read().as_dataframe()

def merge_all(df, dict_, columns_list, how):
    for key,value in dict_.iteritems():
        df = pd.merge(
            df,
            value,
            on = columns_list,
            how = how
        )
    return df
def make_df_with_anomaly_index(df, columns_to_delete, columns_not_renamed):
    df = df         .rename(
            columns=lambda x: x + '_anomaly' if not x in columns_not_renamed and not '_anomaly' in x else x
        )
    true_column = [x for x in list(df.columns) if not x in columns_to_delete]
    res = df[true_column]         .rename(
            columns=lambda x: x + '_anomaly' if not x in columns_not_renamed and not '_anomaly' in x else x
        ) \
        .fillna(0)
    anomaly_column = [x for x in list(res.columns) if '_anomaly' in x]
    res['anomaly_index'] = res[anomaly_column].sum(axis=1)
    res.sort_values(by='anomaly_index', ascending = False, inplace = True)
    return res, df
def filter_good_reason(_dict):
    if _dict['campaign'] != 'preinstall' and _dict['media_source'] != 'tracking_validation' and not 'oem' in _dict['media_source']:
        if _dict['bad_site_share_anomaly'] == 1 or _dict['af_searches_anomaly'] == 1 or _dict['new_users_share_anomaly'] == 1:
            return True
    else:
        return False
def make_records(df):
    result_list = []
    for i in xrange(df.shape[0]):
        value_dict = df.iloc[i,:].to_dict()
        if filter_good_reason(value_dict):
            result_list.append(
                Record(**value_dict)
            )
    return result_list
def upload_data_on_hahn(path, result_list):
    cluster.write(path, result_list)
def write_file_with_anomaly_campaing(df, app_name):
    f = open(app_name.replace(' ', '_') + '.txt', 'w')
    for i in xrange(df.shape[0]):
        dict_ = df.iloc[i,:].to_dict()
        f.write('%s   %s   %s\n' % (dict_['app_name'], dict_['media_source'], dict_['campaign']))
        f.write('Installs: %s\n' % (dict_['installs']))
        f.write('Anomaly Index: %s\n' % (dict_['anomaly_index']))
        if type(dict_['anomaly_values_dict']) == str:
            anomaly_values_dict = ast.literal_eval(dict_['anomaly_values_dict'])
            for key,value in anomaly_values_dict.iteritems():
                if type(value) == type(dict()):
                    f.write('Column: %s\n' % (key))
                    for k,v in value.iteritems():
                        f.write('\t%s: %s\n' % (k,round(v, 3)))
                    f.write('\n')
        if dict_['share'] <= 100:
            f.write('Column: %s\n' % ('new_users_share'))
            f.write('\tnew_users_share: %s\n\n' % (round(dict_['share'], 3)))
        if dict_['fraud_search_share'] <= 1:
            f.write('Column: %s\n' % ('fraud_search_share'))
            f.write('\tfraud_search_share: %s\n' % (round(dict_['fraud_search_share'], 3)))
        if dict_['af_searches_anomaly'] == 1:
            af_cluster_dict = ast.literal_eval(dict_['cluster_dist_value'])
            f.write('Column: %s\n' % ('AppsFlyer Search Event'))
            for i in xrange(len(af_cluster_dict['cluster'])):
                f.write('\t%s: %s\n' % (af_cluster_dict['cluster'].values()[i],round(af_cluster_dict['distance'].values()[i],3)))
        if dict_['bad_site_share_anomaly'] == 1:
            f.write('Column: %s\n' % ('bad_site_share'))
            f.write('\tbad_site_share: %s\n' % (round(dict_['installs_visited_bad_site_share'], 3)))
        f.write('==========================================\n\n')
    f.close()
def write_files_with_anomaly_campaing_describtion(app_name_list, result_for_describe):
    for app_name in app_name_list:
        print 'Start for app %s' % (app_name)
        write_file_with_anomaly_campaing(result_for_describe[result_for_describe['app_name']==app_name], app_name)
def get_columns(column_list):
    columns_to_group = []
    lst = list(itertools.product([0,1], repeat=len(column_list)))
    for i in xrange(1, len(lst)-1):
        index = [x for x in xrange(len(lst[i])) if lst[i][x] == 1]
        columns_to_group.append([column_list[x] for x in xrange(len(lst[i])) if x in index])
    return columns_to_group
def make_total(df, column_list_to_group):
    df_copy = df.copy()
    for columns in column_list_to_group:
        df_group = df.groupby(['fielddate'] + columns)['installs'].sum().reset_index()
        df_copy = pd.concat([df_copy, df_group]).fillna('Total')
    return df_copy
def week_start(x):
    try:
        return str(datetime.datetime.strptime(str(x), '%Y-%m-%d') - datetime.timedelta(days=datetime.datetime.strptime(str(x), '%Y-%m-%d').weekday())).split(' ')[0]
    except:
        return None
table_path_list = [
    '$job_root/antifraud/weeks/anomaly_campaign/anomaly_campaign_by_as_searches',
    '$job_root/antifraud/weeks/anomaly_campaign/anomaly_campaign_by_fraud_searches_share',
    '$job_root/antifraud/weeks/anomaly_campaign/anomaly_campaign_by_meta_data',
    '$job_root/antifraud/weeks/anomaly_campaign/anomaly_campaign_by_new_users_share',
    '$job_root/antifraud/weeks/anomaly_campaign/anomaly_campaign_by_bad_site_share',
    '$job_root/antifraud/weeks/anomaly_campaign/anomaly_campaign_by_metrika_actions',
    '$job_root/antifraud/weeks/anomaly_campaign/anomaly_campaign_by_screen_size',
]
dict_of_df = load_all_bad_campaign_info_from_hahn(table_path_list)
dict_of_df['anomaly_campaign_by_bad_site_share'] = dict_of_df['anomaly_campaign_by_bad_site_share'][['app_name', 'media_source', 'campaign', 'bad_site_share_anomaly']]
dict_of_df['anomaly_campaign_by_bad_site_share'] = dict_of_df['anomaly_campaign_by_bad_site_share'][dict_of_df['anomaly_campaign_by_bad_site_share']['bad_site_share_anomaly']==1]
dict_of_df['anomaly_campaign_by_as_searches'] = dict_of_df['anomaly_campaign_by_as_searches'][['app_name', 'media_source', 'campaign', 'af_searches_anomaly']]
dict_of_df['anomaly_campaign_by_as_searches'] = dict_of_df['anomaly_campaign_by_as_searches'][dict_of_df['anomaly_campaign_by_as_searches']['af_searches_anomaly']==1]
dict_of_df['anomaly_campaign_by_screen_size'] = dict_of_df['anomaly_campaign_by_screen_size'][['campaign_extended_name']]
dict_of_df['anomaly_campaign_by_screen_size']['app_name'] = dict_of_df['anomaly_campaign_by_screen_size']['campaign_extended_name'].apply(lambda x: x.split('__')[0])
dict_of_df['anomaly_campaign_by_screen_size']['media_source'] = dict_of_df['anomaly_campaign_by_screen_size']['campaign_extended_name'].apply(lambda x: x.split('__')[1])
dict_of_df['anomaly_campaign_by_screen_size']['campaign'] = dict_of_df['anomaly_campaign_by_screen_size']['campaign_extended_name'].apply(lambda x: x.split('__')[2])
dict_of_df['anomaly_campaign_by_screen_size']['screen_size_anomaly'] = 1
dict_of_df['anomaly_campaign_by_screen_size'].drop('campaign_extended_name', axis=1, inplace=True)
dict_of_df['anomaly_campaign_by_metrika_actions'] = dict_of_df['anomaly_campaign_by_metrika_actions'][['app_name', 'media_source', 'campaign', 'is_fraud_metrika_actions']]
dict_of_df['anomaly_campaign_by_metrika_actions'] = dict_of_df['anomaly_campaign_by_metrika_actions'][dict_of_df['anomaly_campaign_by_metrika_actions']['is_fraud_metrika_actions']=='1']
dict_of_df['anomaly_campaign_by_metrika_actions']['metrika_action_anomaly'] = dict_of_df['anomaly_campaign_by_metrika_actions']['is_fraud_metrika_actions']
dict_of_df['anomaly_campaign_by_metrika_actions'].drop('is_fraud_metrika_actions', axis=1, inplace=True)
dict_of_df['anomaly_campaign_by_fraud_searches_share'].drop('fraud_search_share', axis = 1, inplace=True)
dict_of_df['anomaly_campaign_by_new_users_share']= dict_of_df['anomaly_campaign_by_new_users_share'][['app_name', 'media_source', 'campaign', 'new_users_share']]
dict_of_df['anomaly_campaign_by_new_users_share'].columns = ['app_name', 'media_source', 'campaign', 'new_users_share_anomaly']
dict_of_df['anomaly_campaign_by_meta_data'].drop('anomaly_values_dict', axis=1, inplace=True)
column_dict = {}
for col in dict_of_df['anomaly_campaign_by_meta_data'].columns:
    if col in ['app_name', 'campaign', 'media_source']:
        column_dict[col] = col
    else:
        column_dict[col] = col + '_anomaly'
dict_of_df['anomaly_campaign_by_meta_data'].rename(columns = column_dict, inplace=True)
all_installs = load_all_installs_by_campaign('$job_root/antifraud/weeks/installs_by_campaign')
result = merge_all(all_installs, dict_of_df, columns_list = ['app_name', 'media_source', 'campaign'], how = 'left').fillna(0)
result.iloc[:, 4:] = result.iloc[:, 4:].astype(int)
result['fielddate'] = week_start(str(datetime.datetime.now() - datetime.timedelta(days = 7)).split(' ')[0])
for i in xrange(result.shape[0]):
    if result['campaign'][i] == 'preinstall':
        result.iloc[i,4:result.shape[1]-1] = 0
result_copy = result.copy()
result_copy['name'] = result['app_name'] + '__' + result['media_source'] + '__' + result['campaign']
result_copy['fraud_index'] = result_copy[['new_users_share_anomaly', 'screen_size_anomaly', 'metrika_action_anomaly', 'af_searches_anomaly']].sum(axis=1)
column_dict = {}
for col in result_copy.columns:
    if col == 'installs':
        column_dict[col] = 'size'
    elif col == 'name':
        column_dict[col] = 'name'
    elif col == 'fraud_index':
        column_dict[col] = 'n__' + col
    else:
        column_dict[col] = 's__' + col
result_copy.rename(columns = column_dict, inplace=True)
result_list = []
for i in xrange(result_copy.shape[0]):
    _dict = result_copy.iloc[i,:].to_dict()
    if _dict['size'] > 10:
        result_list.append(Record(**_dict))
cluster.write('$job_root/antifraud/weeks/installs_by_campaign_to_vis', result_list, append=True)
result['fraud_index'] = result[['new_users_share_anomaly', 'screen_size_anomaly', 'metrika_action_anomaly', 'af_searches_anomaly']].sum(axis=1)
result['is_fraud'] = result['fraud_index'].apply(lambda x: '1' if x > 1 else '0').astype(str)
result_for_stat = result[['fielddate','app_name', 'media_source', 'campaign', 'is_fraud', 'installs']]
result_for_stat = result_for_stat.groupby(['fielddate', 'app_name', 'media_source', 'campaign', 'is_fraud'])['installs'].sum().reset_index()
column_list = ['app_name', 'media_source', 'campaign', 'is_fraud']
column_list_to_group = get_columns(column_list)
result_for_stat = make_total(result_for_stat, column_list_to_group)
import requests
yaml_config = u'''
---
allow_recalculate: "1"
autovalues_enabled: "0"
dimensions:
  - fielddate: date
  - app_name: string
  - media_source: string
  - campaign: string
  - is_fraud: string
measures:
  - installs: number
view_types:
  app_name:
    type: Selector
    default: Total
  media_source:
    type: Selector
    default: Total
  campaign:
    type: Selector
    default: Total
  is_fraud:
    type: Selector
    default: Total
aggregate_uncomplete_period: "0"
'''
resp = requests.post(
    'https://upload.stat.yandex-team.ru/_api/report/config',
    headers={'StatRobotUser': 'robot_ktereshin', 'StatRobotPassword': '3en5cayAppeif6l'},
    data={
        'cube_config': yaml_config,'title': 'AntiFraud Installs', 'name': 'Distribution/ktereshin/antifraud/installs'
    },
)
import re
csv_data = re.sub('\n$', '', result_for_stat.to_csv(sep=';', index=False))
r = requests.post(
    'https://upload.stat.yandex-team.ru/_api/report/data',
    headers={'StatRobotUser': 'robot_ktereshin', 'StatRobotPassword': '3en5cayAppeif6l'},
    data={
        'name': 'Distribution/ktereshin/antifraud/installs',
        'scale': 'w',
        'data': csv_data,
    },
)
