#!/usr/bin/env python
# -*- coding: utf-8 -*-

import os

from crypta.lookalike.lib.python.utils.config.environment import environment
from crypta.lookalike.lib.python.utils.config import (
    production_config,
    testing_config,
)


is_production = environment == 'production'

categorical_feature_types = ('heuristic_common', 'longterm_interests', 'main_region_city', 'trainable_segments')
categorical_feature_name_to_keyword = {
    'heuristic_common': '547',
    'longterm_interests': '601',
    'main_region_city': 'city',
    'trainable_segments': '546',
}

RELEASED = 'prestable' if is_production else 'testing'
CRYPTA_YT_WORKING_DIR = production_config.CRYPTA_YT_WORKING_DIR if is_production else testing_config.CRYPTA_YT_WORKING_DIR
CRYPTA_ML_JUGGLER_HOST = production_config.CRYPTA_ML_JUGGLER_HOST if is_production else testing_config.CRYPTA_ML_JUGGLER_HOST
SEGMENTS_NUM_FOR_VALIDATION = production_config.SEGMENTS_NUM_FOR_VALIDATION if is_production else testing_config.SEGMENTS_NUM_FOR_VALIDATION
SEGMENTS_NUM_FOR_TEST = production_config.SEGMENTS_NUM_FOR_TEST if is_production else testing_config.SEGMENTS_NUM_FOR_TEST
MIN_USERS_PER_SEGMENT = production_config.MIN_USERS_PER_SEGMENT if is_production else testing_config.MIN_USERS_PER_SEGMENT
MIN_USERS_PER_APP = production_config.MIN_USERS_PER_APP if is_production else testing_config.MIN_USERS_PER_APP
POSITIVES_VOLUME = production_config.POSITIVES_VOLUME if is_production else testing_config.POSITIVES_VOLUME
USER_DATA_SAMPLING_RATE = production_config.USER_DATA_SAMPLING_RATE if is_production else testing_config.USER_DATA_SAMPLING_RATE
MAX_SEGMENTS_PER_TYPE = production_config.MAX_SEGMENTS_PER_TYPE if is_production else testing_config.MAX_SEGMENTS_PER_TYPE
MAX_GOALS_PER_TYPE = production_config.MAX_GOALS_PER_TYPE if is_production else testing_config.MAX_GOALS_PER_TYPE

MIN_USERS_PER_RMP_GOAL = 100
DAYS_FOR_RMP_GOALS = 7

GOALS_IDS_START = 100
GOALS_IDS_END = int(1e9)
AUDIENCE_SEGMENTS_IDS_START = int(2e9)
AUDIENCE_SEGMENTS_IDS_END = int(2499e8)

UPPER_LIMIT_FOR_IDS_IN_SEGMENT = int(10e6)
EMBEDDING_FEATURES_SIZE = 512
DAYS_TO_CALCULATE_LOWER_BOUND = 14

GENDER_CLASSES_NUM = 3
AGE_CLASSES_NUM = 7
INCOME_CLASSES_NUM = 6
TOP_CITIES_NUMBER = 300

VERSIONS_TO_KEEP_NUM = 3
# site2vec is retrained on the 15th day
DATE_TO_SAVE_MODEL_PERMANENTLY = 20

# features export
DAYS_BACK_FOR_REACHED_GOALS = 35
DAYS_WITHOUT_UPDATE_GOALS = 30
AFFINITIVE_SITES_TO_STORE_FOR_GOALS = 40
AFFINITIVE_APPS_TO_STORE_FOR_GOALS = 40
AFFINITIVE_WORDS_TO_STORE_FOR_GOALS = 40
GOAL_FEATURES_FOR_CAESAR_DAILY_TABLES_TTL_DAYS = 14

# retraining params
DELETED_SEGMENTS_NUMBER_TO_RETRAIN = 1
NEW_SEGMENTS_NUMBER_TO_RETRAIN = 5

TOP_CITIES_UPDATE_NUMBER_TO_RETRAIN = 15
DATE_FORMAT = '%Y-%m-%d'

SEGMENT_TYPES = ['goal', 'audience', 'metrika', 'rmp_goal', 'app']

YT_PROXY = 'hahn'

COMMON_TMP_DIRECTORY = '//tmp'
COMMON_YQL_TMP_DIRECTORY = os.path.join(COMMON_TMP_DIRECTORY, 'yql')

LOOKALIKE_DIRECTORY = os.path.join(CRYPTA_YT_WORKING_DIR, 'lookalike')
LOOKALIKE_VERSIONS_DIRECTORY = os.path.join(LOOKALIKE_DIRECTORY, 'versions')
LOOKALIKE_MONTHLY_VERSIONS_DIRECTORY = os.path.join(LOOKALIKE_DIRECTORY, 'versions_monthly')
DATALENS_LOOKALIKE_DIRECTORY = os.path.join(LOOKALIKE_DIRECTORY, 'datalens')

DATALENS_LOOKALIKE_QUALITY_TABLE = os.path.join(DATALENS_LOOKALIKE_DIRECTORY, 'quality')
DATALENS_LOOKALIKE_COUNTS_TABLE = os.path.join(DATALENS_LOOKALIKE_DIRECTORY, 'counts')
DATALENS_LOOKALIKE_PRISM_CORRELATION = os.path.join(DATALENS_LOOKALIKE_DIRECTORY, 'prism_lal_correlation')

# common directories
LAB_DIRECTORY = os.path.join(CRYPTA_YT_WORKING_DIR, 'lab')
PROFILES_YT_DIRECTORY = os.path.join(CRYPTA_YT_WORKING_DIR, 'profiles')

LAL_FEATURES_MAPPING_TABLE = os.path.join(LOOKALIKE_DIRECTORY, 'features_mapping')
SEGMENTS_WITH_COUNTS_TABLE = os.path.join(LOOKALIKE_DIRECTORY, 'segments_with_counts')
SEGMENTS_STATS = os.path.join(LOOKALIKE_DIRECTORY, 'segments_stats')
ADS_TYPES_STATS = os.path.join(LOOKALIKE_DIRECTORY, 'ads_types_stats')
GOALS_TABLE = os.path.join(LOOKALIKE_DIRECTORY, 'goal_audiences', 'goal_audiences')
METRIKA_TABLE = os.path.join(LOOKALIKE_DIRECTORY, 'metrika_segments', 'metrika_segments')

# segments and goals used for ads
ADS_GOALS_SEGMENTS_DIRECTORY = os.path.join(LOOKALIKE_DIRECTORY, 'ads_goals_segments')
ADS_UPDATE_DATES_TABLE = os.path.join(ADS_GOALS_SEGMENTS_DIRECTORY, 'update_dates')
MEANINGFUL_GOALS_IDS_TABLE = os.path.join(ADS_GOALS_SEGMENTS_DIRECTORY, 'meaningful_goals_ids')
RETARGETING_IDS_TABLE = os.path.join(ADS_GOALS_SEGMENTS_DIRECTORY, 'retargeting_ids')
MULTIPLIERS_IDS_TABLE = os.path.join(ADS_GOALS_SEGMENTS_DIRECTORY, 'multipliers_ids')

# apps
APP_BY_DEVID_MONTHLY = os.path.join(CRYPTA_YT_WORKING_DIR, 'ids_storage', 'device_id', 'app_metrica_month')

# LaL training
LAL_TRAINING_DIRECTORY = os.path.join(LOOKALIKE_DIRECTORY, 'training')
TRAINING_SEGMENTS_TABLE = os.path.join(LAL_TRAINING_DIRECTORY, 'ads_segments')
TRAINING_GOALS_TABLE = os.path.join(LAL_TRAINING_DIRECTORY, 'ads_goals')
SEGMENTS_FOR_LAL_TRAINING_TABLE = os.path.join(LAL_TRAINING_DIRECTORY, 'segments_for_28_days')
RANKED_USER_DATA_YANDEXUIDS_TABLE = os.path.join(LAL_TRAINING_DIRECTORY, 'ranked_user_data_yandexuids')
USER_DSSM_FEATURES_TABLE = os.path.join(LAL_TRAINING_DIRECTORY, 'user_data_dssm_features')
SEGMENTS_DSSM_FEATURES_TABLE = os.path.join(LAL_TRAINING_DIRECTORY, 'segments_dssm_features')
SEGMENTS_USER_DATA_STATS_TABLE = os.path.join(LAL_TRAINING_DIRECTORY, 'segments_user_data_stats')
POSITIVES_WITH_DSSM_FEATURES_TABLE = os.path.join(LAL_TRAINING_DIRECTORY, 'positives_with_dssm_features')
NEGATIVES_TABLE = os.path.join(LAL_TRAINING_DIRECTORY, 'negatives')
TRAIN_SEGMENTS_WITH_COUNTS_TABLE = os.path.join(LAL_TRAINING_DIRECTORY, 'segments_with_counts')
TRAIN_SAMPLE_TABLE = os.path.join(LAL_TRAINING_DIRECTORY, 'train_sample')
VALIDATION_SAMPLE_TABLE = os.path.join(LAL_TRAINING_DIRECTORY, 'validation_sample')
SEGMENTS_DICT_FILE = os.path.join(LAL_TRAINING_DIRECTORY, 'segments_dict.json')
DSSM_MODEL_FILE = os.path.join(LAL_TRAINING_DIRECTORY, 'dssm_model.applier')

# LaL test
LAL_TEST_DIRECTORY = os.path.join(LOOKALIKE_DIRECTORY, 'test')
TEST_USERS_TABLE = os.path.join(LAL_TEST_DIRECTORY, 'users')
TEST_SEGMENTS_WITH_COUNTS_TABLE = os.path.join(LAL_TEST_DIRECTORY, 'segments_with_counts')
TEST_SEGMENTS_DSSM_FEATURES_TABLE = os.path.join(LAL_TEST_DIRECTORY, 'segments_dssm_features')
TEST_SEGMENTS_DSSM_VECTORS = os.path.join(LAL_TEST_DIRECTORY, 'segments_dssm_vectors')
TEST_USERS_DSSM_VECTORS = os.path.join(LAL_TEST_DIRECTORY, 'users_dssm_vectors')
TEST_DSSM_LAL_DISTANCES = os.path.join(LAL_TEST_DIRECTORY, 'dssm_lal_distances')
TEST_DSSM_SEGMENTS_POINTS = os.path.join(LAL_TEST_DIRECTORY, 'dssm_segments_points')
TEST_RANDOM_SEGMENTS_POINTS = os.path.join(LAL_TEST_DIRECTORY, 'random_segments_points')
TEST_DSSM_PR_STATS = os.path.join(LAL_TEST_DIRECTORY, 'dssm_pr_stats')
TEST_RANDOM_PR_STATS = os.path.join(LAL_TEST_DIRECTORY, 'random_pr_stats')
TEST_LAL_METRICS = os.path.join(LAL_TEST_DIRECTORY, 'lal_metrics')
TEST_LOWER_BOUNDS = os.path.join(LAL_TEST_DIRECTORY, 'lower_bounds')

AUDIENCE_SEGMENTS_INFO_TABLE = '//home/audience/production/export/segments_simple'
AUDIENCE_SEGMENTS_USERS_TABLE = '//home/crypta/production/audience/segments/Full'

LAB_DATA_DIRECTORY = os.path.join(LAB_DIRECTORY, 'data')
LAB_SEGMENTS_TABLE = os.path.join(LAB_DIRECTORY, 'database', 'Segment').replace('testing', 'production')
USER_DATA_TABLE = os.path.join(LAB_DATA_DIRECTORY, 'mixed', 'UserData')
USER_DATA_BY_CRYPTAID_TABLE = os.path.join(LAB_DATA_DIRECTORY, 'crypta_id', 'UserData')
USER_DATA_STATS_TABLE = os.path.join(LAB_DATA_DIRECTORY, 'crypta_id', 'UserDataStats')

CATEGORICAL_FEATURES_MATCHING_DIR = os.path.join(PROFILES_YT_DIRECTORY, 'input-data', 'categorical_features')
SEGMENTS_STORAGE_BY_YANDEXUID = os.path.join(PROFILES_YT_DIRECTORY, 'stages/merged-segments/segments_storage_by_yandexuid')

# matching
PROFILES_MATCHING_DIRECTORY = os.path.join(PROFILES_YT_DIRECTORY, 'matching')
CRYPTAID_YANDEXUID_MATCHING_TABLE = os.path.join(PROFILES_MATCHING_DIRECTORY, 'cryptaid_yandexuid')
YANDEXUID_CRYPTAID_MATCHING_TABLE = os.path.join(PROFILES_MATCHING_DIRECTORY, 'yandexuid_cryptaid')

PUBLIC_MATCHING_DIRECTORY = '//home/crypta/public/matching/by_id'
ID_TYPES = ['idfa', 'gaid', 'ifv', 'oaid']

# logs for ads segments and goals
MEANINGFUL_GOALS_DIR = '//statbox/cube/daily/comdep/hypercubes/v1/campaigns'
BUCHHALTER_DIR = os.path.join(production_config.CRYPTA_YT_WORKING_DIR, 'buchhalter')
RETARGETING_DIR = os.path.join(BUCHHALTER_DIR, 'retargeting_stats')
MULTIPLIERS_DIR = os.path.join(BUCHHALTER_DIR, 'multipliers', 'retargeting_id_stats')
AUTOBUDGET_GOALS_LOG_TABLE = '//home/yabs/dict/AutoBudgetOrderWithHistory'
POSTBACK_MOBILE_LOG = '//logs/bs-uniform-postback-log/1d'
MOBILE_GOALS_TRACKER_TABLE = '//home/adv/DirectMobileGoalsExternalTracker'

# correlation validation
PRISM_VALIDATION_DIR = os.path.join(LAL_TEST_DIRECTORY, 'prism_validation')
CURRENT_PRISM_LAL = os.path.join(PRISM_VALIDATION_DIR, 'current_lal')
PREVIOUS_PRISM_LAL = os.path.join(PRISM_VALIDATION_DIR, 'previous_lal')

# experiments directory
EXPERIMENTS_DIR = os.path.join(LOOKALIKE_DIRECTORY, 'experiments')
EXPERIMENTS_TRAINING_DIR = os.path.join(EXPERIMENTS_DIR, 'training')
EXPERIMENTS_TRAIN_SAMPLE_TABLE = os.path.join(EXPERIMENTS_TRAINING_DIR, 'train_sample')
EXPERIMENTS_USER_DSSM_FEATURES_TABLE = os.path.join(EXPERIMENTS_TRAINING_DIR, 'user_data_dssm_features')
EXPERIMENTS_SEGMENTS_FOR_LAL_TRAINING_TABLE = os.path.join(EXPERIMENTS_TRAINING_DIR, 'segments_for_28_days')
EXPERIMENTS_SEGMENTS_USER_DATA_STATS_TABLE = os.path.join(EXPERIMENTS_TRAINING_DIR, 'segments_user_data_stats')

EXPERIMENTS_TEST_DIR = os.path.join(EXPERIMENTS_DIR, 'test')
EXPERIMENTS_SEGMENTS_DSSM_FEATURES_TABLE = os.path.join(EXPERIMENTS_TEST_DIR, 'segments_dssm_features')
EXPERIMENTS_RANDOM_PR_STATS = os.path.join(EXPERIMENTS_TEST_DIR, 'random_pr_stats')
EXPERIMENTS_TEST_SEGMENTS_WITH_COUNTS_TABLE = os.path.join(EXPERIMENTS_TEST_DIR, 'segments_with_counts')
TEST_BASELINE_PR_STATS = os.path.join(LAL_TEST_DIRECTORY, 'baseline_pr_stats')

# siberia
FOR_DESCRIPTION_BY_CRYPTAID_TABLE = '//home/crypta/production/siberia/custom/crypta_id_user_data/by_crypta_id'

# features export
FEATURES_EXPORT_DIR = os.path.join(LOOKALIKE_DIRECTORY, 'features_export')
GOAL_FEATURES_FOR_CAESAR_DAILY_DIR = os.path.join(FEATURES_EXPORT_DIR, 'goal_daily')
GOAL_FEATURES_FOR_CAESAR_FULL_TABLE = os.path.join(FEATURES_EXPORT_DIR, 'goal_full')
GOAL_FEATURES_FOR_CAESAR_DAILY_LATEST_TABLE = os.path.join(GOAL_FEATURES_FOR_CAESAR_DAILY_DIR, 'latest')
CAESAR_GOALS_DUMP_LATEST_TABLE = '//home/bs/logs/AdsCaesarGoalsDump/latest'
