from os.path import join

MAX_SAMPLE_SIZE = int(400e3)
VECTOR_SIZE = 512
VALIDATION_SAMPLE_PERCENTAGE = 10
VALIDATION_SAMPLE_REST = 0
MIN_UNIQUE_IDS_CNT = 4 * 1e4
MIN_IDS_FOR_CLASS_CNT = 9 * 1e2
THRESHOLD_FOR_NEW_SAMPLE_ADDING = -0.05
MODEL_APPROVING_SCORE_THRESHOLD = 0.3
MODEL_UNSURE_SCORE_THRESHOLD = 0.15
MIXED_UP_CLASSES_THRESHOLD = -0.2
MAX_SEGMENT_SIZE = int(1e7)

segment_feature_types = (
    'heuristic_common', 'longterm_interests', 'gender', 'age_segments', 'income_5_segments',
)
user_data_features = ['Vectors', 'Attributes', 'Segments']

metrics_to_show = (
    'roc_auc', 'accuracy.test', 'logloss.test', 'train_sample_size',
    'train_distribution.positive', 'train_distribution.negative',
)
train_sample_columns = ('id', 'id_type', 'segment_name', 'retro_date')
labels = ('positive', 'negative')

id_type_to_column = {
    'phone': 'phone',
    'phone_md5': 'phone',
    'email': 'email',
    'email_md5': 'email',
    'uuid': 'UUID',
    'gaid': 'GAID',
    'idfa': 'IDFA',
    'duid': 'ClientID',
}
columns_for_classification = ('phone', 'email', 'ClientID', 'IDFA', 'GAID', 'UUID', 'target', 'retro_date')

SAMPLE_BY_PUID = 'sample_by_puid'
COMBINED_SAMPLE_BY_PUID = 'combined_sample_by_puid'
RAW_SAMPLE = 'raw_sample'
SAMPLE_BY_YUID = 'sample_by_yuid'
SAMPLE_BY_YUID_VALIDATION = 'sample_by_yuid_validation'
RESULTING_SEGMENTS = 'resulting_segments'

IDENTIFIER_UDF_PATH = 'yt://hahn/home/crypta/public/udfs/stable/libcrypta_identifier_udf.so'
DEFAULT_NUMBER_OF_TOP_FEATURES = 10
DEFAULT_OUTPUT_SEGMENT_SIZE = int(1e6)

# tables
PROFILES_DIRECTORY = '//home/crypta/production/profiles'

SEGMENTS_DIRECTORY = join(PROFILES_DIRECTORY, 'segments')
CATEGORICAL_FEATURES_CUSTOM_ML_MATCHING_DIR = join(SEGMENTS_DIRECTORY, 'custom_ml', 'categorical_features')
CATBOOST_FEATURES = join(SEGMENTS_DIRECTORY, 'custom_ml', 'catboost_features')
TRAINABLE_SEGMENTS_DATA_DIRECTORY = join(SEGMENTS_DIRECTORY, 'external_data')
EXISTING_MODEL_PREDICTIONS = join(SEGMENTS_DIRECTORY, 'raw_output')

LAB_SEGMENTS_INFO_TABLE = join(PROFILES_DIRECTORY, 'export', 'lab', 'segments')
TRAINABLE_SEGMENTS_DIRECTORY = join(PROFILES_DIRECTORY, 'segment_parts', 'trainable_segments')
USER_DATA_TABLE = '//home/crypta/production/lab/data/UserData'

# matching
MATCHING_DIRECTORY = join(PROFILES_DIRECTORY, 'matching')
INDEVICE_YANDEXUID = join(MATCHING_DIRECTORY, 'indevice_yandexuid')
YANDEXUID_CRYPTAID_TABLE = join(MATCHING_DIRECTORY, 'yandexuid_cryptaid')
CRYPTA_PUBLIC_DIRECTORY = '//home/crypta/public'
MATCHING_TABLE_TEMPLATE = join(CRYPTA_PUBLIC_DIRECTORY, 'matching/by_id/{}/direct/puid')
DUID_MATCHING = join(CRYPTA_PUBLIC_DIRECTORY, 'ids_storage/duid/index')

PROFILE_TVM_ID = 2009855
CUSTOM_ML_TVM_ID = 2033096
SIBERIA_HOST = {
    'testing': 'siberia-test.crypta.yandex.net',
    'stable': 'siberia.crypta.yandex.net',
}
SIBERIA_TVM_ID = {
    'testing': 2017435,
    'stable': 2017433,
}
SIBERIA_PORT = 80
